diff --git a/include/vast/Conversion/Parser/default-parsers-config.yaml b/include/vast/Conversion/Parser/default-parsers-config.yaml index e2e12afaf3..a50525d494 100644 --- a/include/vast/Conversion/Parser/default-parsers-config.yaml +++ b/include/vast/Conversion/Parser/default-parsers-config.yaml @@ -58,6 +58,30 @@ - data # ... category: source +# const char* getprogname(void); +- function: getprogname + model: + return_type: data + arguments: [] + category: source + +# char* getcwd(char* buf, size_t size); +- function: getcwd + model: + return_type: data + arguments: + - data # char* buf + - nodata # size_t size + category: source + +# char* getenv(const char* name) +- function: getenv + model: + return_type: data + arguments: + - data # const char* name + category: source + # # Parser data sinks # @@ -81,6 +105,27 @@ - maybedata # ... category: sink +# int sprintf(char * restrict str, const char * restrict format, ...); +- function: sprintf + model: + return_type: nodata + arguments: + - nodata # char * restrict str + - maybedata # const char * restrict format + - maybedata # ... + category: sink + +# size_t fwrite(const void *restrict ptr, size_t size, size_t nitems, FILE *restrict stream); +- function: fwrite + model: + return_type: nodata + arguments: + - maybedata # const void *restrict ptr + - maybedata # size_t size + - maybedata # size_t nitems + - nodata # FILE *restrict stream + category: sink + # void perror(const char *s); - function: perror model: @@ -104,11 +149,213 @@ arguments: - maybedata # const char * restrict filename - maybedata # const char * restrict mode + category: sink + +# off_t lseek(int fildes, off_t offset, int whence); +- function: lseek + model: + return_type: nodata + arguments: + - nodata # int fildes + - maybedata # off_t offset + - maybedata # int whence + category: sink + +# int fseeko(FILE *stream, off_t offset, int whence); +- function: fseeko + model: + return_type: nodata + arguments: + - maybedata # FILE* stream + - maybedata # off_t offset + - maybedata # whence + category: sink + +# long ftello(FILE *stream); +- function: ftello + model: + return_type: nodata + arguments: + - maybedata # FILE* stream + category: sink + +# int dirfd(DIR *dirp); +- function: dirfd + model: + return_type: nodata + arguments: + - maybedata # DIR *dirp + category: sink + +# int access(const char* file, int mode); +- function: access + model: + return_type: nodata + arguments: + - maybedata # const char* file + - maybedata # int mode + category: sink + +# int creat (const char *path, mode_t mode) +- function: creat + model: + return_type: nodata + arguments: + - maybedata # const char * path + - maybedata # mode_t mode + category: sink + +# TODO! Remove when issue 753 is resolved. +# int \01_creat (const char *path, mode_t mode) +- function: \01_creat + model: + return_type: nodata + arguments: + - maybedata # const char * path + - maybedata # mode_t mode + category: sink + +# size_t strlen(const char* s); +- function: strlen + model: + return_type: nodata + arguments: + - maybedata # const char *s + category: sink + +# size_t strnlen(const char* s, size_t maxlen); +- function: strnlen + model: + return_type: nodata + arguments: + - maybedata # const char * s + - nodata # size_t maxlen + category: sink + +# uint32_t __builtin_bswap32(uint32_t x); +- function: __builtin_bswap32 + model: + return_type: nodata + arguments: + - maybedata # uint32_t x + category: sink + +# uint64_t __builtin_bswap64(uint64_t x); +- function: __builtin_bswap64 + model: + return_type: nodata + arguments: + - maybedata # uint64_t x + category: sink + +# int fileno(FILE *stream); +- function: fileno + model: + return_type: nodata + arguments: + - maybedata # FILE *stream + category: sink + +# void flockfile(FILE *file); +- function: flockfile + model: + return_type: nodata + arguments: + - maybedata # FILE *file + category: sink + +# void funlockfile(FILE *file); +- function: flockfile + model: + return_type: nodata + arguments: + - maybedata # FILE *file + category: sink + +# int fflush(FILE *stream); +- function: fflush + model: + return_type: nodata + arguments: + - maybedata # FILE *stream; + category: sink + +# int fpurge(FILE *stream); +- function: fpurge + model: + return_type: nodata + arguments: + - maybedata # FILE *stream; + category: sink + +# int putc(int c, FILE *stream); +- function: putc + model: + return_type: nodata + arguments: + - maybedata # int c + - maybedata # FILE *stream + category: sink + +# void qsort(void *base, size_t nel, size_t width, int (*compar)(const void*, const void*)); +- function: qsort + model: + return_type: nodata + arguments: + - maybedata # void* base + - nodata # size_t nel + - nodata # size_t width + - maybedata # int (*compar)(const void*, const void*) category: sink # # Parser functions -# +# + +# int memcmp(const void *s1, const void *s2, size_t n); +- function: memcmp + model: + return_type: nodata + arguments: + - data # const void* s1 + - data # const void* s2 + - data # size_t n + category: parser + +# int strcmp(const char *s1, const char *s2); +- function: strcmp + model: + return_type: nodata + arguments: + - data # const char* s1 + - data # const char* s2 + category: parser + +# int strncmp(const char *s1, const char *s2, size_t n); +- function: strncmp + model: + return_type: nodata + arguments: + - data # const char* s1 + - data # const char* s2 + - data # size_t n + category: parser + +# int isascii(int c) +- function: isascii + model: + return_type: nodata + arguments: + - data # int c + category: parser + +# int isupper(int c); +- function: isupper + model: + return_type: nodata + arguments: + - data # int c + category: parser # int isspace(int c); - function: isspace @@ -126,6 +373,70 @@ - data # int c category: parser +# size_t strspn(const char *s, const char *accept); +- function: strspn + model: + return_type: data + arguments: + - data # const char* s + - data # const char* accept + category: parser + +# size_t strcspn(const char *s, const char *charset); +- function: strcspn + model: + return_type: data + arguments: + - data # const char* s + - data # const char* charset + category: parser + +# char *strchr(const char* s, int c); +- function: strchr + model: + return_type: data + arguments: + - data # const char* s + - data # int c + category: parser + +# char *strrchr(const char* s, int c); +- function: strrchr + model: + return_type: data + arguments: + - data # const char* s + - data # int c + category: parser + +# void *memchr(const void *s, int c, size_t n); +- function: memchr + model: + return_type: data + arguments: + - data # const void* s + - data # int c + - data # size_t n + category: parser + +# void *memrchr(const void *s, int c, size_t n); +- function: memrchr + model: + return_type: data + arguments: + - data # const void* s + - data # int c + - data # size_t n + category: parser + +# int tolower(int c); +- function: tolower + model: + return_type: maybedata + arguments: + - maybedata # int c + category: parser + # # Non-parser functions # @@ -138,6 +449,38 @@ - nodata # int status category: nonparser +# int close(int fd); +- function: close + model: + return_type: nodata + arguments: + - nodata # int fd + category: nonparser + +# TODO: Remove when issue 753, 756 is resolved +# int \01_close(int fd); +- function: \01_close + model: + return_type: nodata + arguments: + - nodata # int fd + category: nonparser + +# int \01_closedir(DIR* dirp); +- function: \01_closedir + model: + return_type: nodata + arguments: + - nodata # DIR* dirp + category: nonparser + +# void __builtin_unreachable(void); +- function: __builtin_unreachable + model: + return_type: nodata + arguments: [] + category: nonparser + # void * malloc(size_t size); - function: malloc model: @@ -146,6 +489,24 @@ - nodata # size_t size category: nonparser +# void *realloc(void* ptr, size_t size); +- function: realloc + model: + return_type: nodata + arguments: + - nodata # void* ptr + - nodata # size_t size + category: nonparser + +# void * calloc(size_t count, size_t size); +- function: calloc + model: + return_type: nodata + arguments: + - nodata # size_t count + - nodata # size_t size + category: nonparser + # void fclose(FILE * stream); - function: fclose model: @@ -154,6 +515,253 @@ - nodata # FILE * stream category: nonparser +# int futimens(int fd, const struct timespec timespec[2]); +- function: futimens + model: + return_type: nodata + arguments: + - nodata # int fd + - nodata # const struct timespec timespec[2] + category: nonparser + +# int utimensat(int fd, const char *path, const struct timespec times[2], int flag); +- function: utimensat + model: + return_type: nodata + arguments: + - nodata # int fd + - nodata # const char *path + - nodata # const struct timespec times[2] + - nodata # int flag + category: nonparser + +# int utimes(const char *path, const struct timespec times[2]); +- function: utimes + model: + return_type: nodata + arguments: + - nodata # const char *path + - nodata # const struct timespec timespec[2] + category: nonparser + +# int futimes(int fildes, const struct timespec timespec[2]); +- function: futimes + model: + return_type: nodata + arguments: + - nodata # int fildes + - nodata # const struct timespec timespec[2] + category: nonparser + +# int fdutimens(int fd, char const *file, struct timespec const timespec[2]); +- function: fdutimens + model: + return_type: nodata + arguments: + - nodata # int fd + - nodata # char const *file + - nodata # struct timespec const timespec[2] + category: nonparser + +# int pipe(int pipefd[2]); +- function: pipe + model: + return_type: nodata + arguments: + - nodata # int pipefd[2] + category: nonparser + +# int chdir(const char *path); +- function: chdir + model: + return_type: nodata + arguments: + - nodata # const char *path + category: nonparser + +# int fchdir(int fd); +- function: fchdir + model: + return_type: nodata + arguments: + - nodata # int fd + category: nonparser + +# long __builtin_expect (long exp, long c); +- function: __builtin_expect + model: + return_type: nodata + arguments: + - nodata # long exp + - nodata # long c + category: nonparser + +# void __error(void); +- function: __error + model: + return_type: nodata + arguments: [] + category: nonparser + +# void error(int status, int errnum, const char* format, ...); +- function: error + model: + return_type: nodata + arguments: + - nodata # int status + - nodata # int errnum + - nodata # const char* format + - nodata # ... + category: nonparser + +# void abort(void); +- function: abort + model: + return_type: nodata + arguments: [] + category: nonparser + +# int fcntl(int fd, int op, ...); +- function: fcntl + model: + return_type: nodata + arguments: + - nodata # int fd + - nodata # int op + - nodata # ... + category: nonparser + +# int \01_fcntl(int fd, int op, ...); +- function: \01_fcntl + model: + return_type: nodata + arguments: + - nodata # int fd + - nodata # int op + - nodata # ... + category: nonparser + +# int unlink(char const *name); +- function: unlink + model: + return_type: nodata + arguments: + - nodata # char const *name + category: nonparser + +# int unlinkat(int fd, const char *path, int flag); +- function: unlinkat + model: + return_type: nodata + arguments: + - nodata # int fd + - nodata # const char *path + - nodata # int flag + category: nonparser + +# size_t __builtin_object_size (const void * ptr, int type) +- function: __builtin_object_size + model: + return_type: nodata + arguments: + - nodata # const void* ptr + - nodata # int type + category: nonparser + +# void * __builtin___memset_chk (void *s, int c, size_t n, size_t os); +- function: __bulitin___memset_chk + model: + return_type: nodata + arguments: + - nodata # void* s + - nodata # int c + - nodata # size_t n + - nodata # size_t os + category: nonparser + +# void * __builtin___memcpy_chk (void *dest, const void *src, size_t n, size_t os); +- function: __bulitin___memcpy_chk + model: + return_type: nodata + arguments: + - nodata # void* dest + - nodata # const void* src + - nodata # size_t n + - nodata # size_t os + category: nonparser + +# int __builtin___snprintf_chk(char* s, size_t maxlen, int flag, size_t os, const char* fmt, ...); +- function: __builtin___snprintf_chk + model: + return_type: nodata + arguments: + - nodata # char* s + - nodata # size_t maxlen + - nodata # int flag + - nodata # size_t os + - nodata # const char* fmt + - nodata # ... + category: nonparser + +# int __builtin___sprintf_chk(char* s, int flag, size_t os, const char* fmt, ...); +- function: __builtin___sprintf_chk + model: + return_type: nodata + arguments: + - nodata # char* s + - nodata # int flag + - nodata # size_t os + - nodata # const char* fmt + - nodata # ... + category: nonparser + +# char* __builtin___stpcpy_chk(char* dest, const char* src, size_t os); +- function: __builtin___stpcpy_chk + model: + return_type: nodata + arguments: + - nodata # char* dest + - nodata # const char* src + - nodata # size_t os + category: nonparser + +# char* __builtin___strcpy_chk(char* dest, const char* src, size_t os); +- function: __builtin___strcpy_chk + model: + return_type: nodata + arguments: + - nodata # char* dest + - nodata # const char* src + - nodata # size_t os + category: nonparser + +# int strerror_r (int errnum, char *strerrbuf, size_t buflen); +- function: strerror_r + model: + return_type: nodata + arguments: + - nodata # int errnum + - nodata # strerrbuf + - nodata # size_t buflen + category: nonparser + +# char *strerror(int errnum); +- function: strerror + model: + return_type: nodata + arguments: + - nodata # int errnum + category: nonparser + +# int clock_getime(clockid_t clock_id, struct timespec *tp); +- function: clock_gettime + model: + return_type: nodata + arguments: + - nodata # clockid_t clock_id + - nodata # struct timespec* tp + category: nonparser + - function: main model: return_type: nodata diff --git a/include/vast/Conversion/Parser/gnulib-default-config.yaml b/include/vast/Conversion/Parser/gnulib-default-config.yaml new file mode 100644 index 0000000000..95a0e5263e --- /dev/null +++ b/include/vast/Conversion/Parser/gnulib-default-config.yaml @@ -0,0 +1,1392 @@ +# Copyright (c) 2024, Trail of Bits, Inc. + +# +# Parser data sources +# + +# char * fgets(char * restrict str, int size, FILE * restrict stream); +- function: fgets + model: + return_type: data + arguments: + - data # char * restrict str + - nodata # int size + - nodata # FILE * restrict stream + category: source + +# size_t fread(void * restrict buffer, size_t size, size_t count, FILE * restrict stream); +- function: fread + model: + return_type: nodata + arguments: + - data # void * restrict buffer + - nodata # size_t size + - nodata # size_t count + - nodata # FILE * restrict stream + category: source + +# char * gets(char * str); +- function: gets + model: + return_type: data + arguments: + - data # char * str + category: source + +# char * gets_s(char * str, rsize_t n); +- function: gets_s + model: + return_type: data + arguments: + - data # char * str + - nodata # rsize_t n + category: source + +# int getchar(void); +- function: getchar + model: + return_type: data + arguments: [] + category: source + +# int scanf(const char * restrict format, ...); +- function: scanf + model: + return_type: nodata + arguments: + - nodata # const char * restrict format + - data # ... + category: source + +# const char* getprogname(void); +- function: getprogname + model: + return_type: data + arguments: [] + category: source + +# char* getcwd(char* buf, size_t size); +- function: getcwd + model: + return_type: data + arguments: + - data # char* buf + - nodata # size_t size + category: source + +# char* getenv(const char* name) +- function: getenv + model: + return_type: data + arguments: + - data # const char* name + category: source + +# +# Parser data sinks +# + +# int printf(const char * restrict format, ...); +- function: printf + model: + return_type: nodata + arguments: + - maybedata # const char * restrict format + - maybedata # ... + category: sink + +# int fprintf(FILE * restrict stream, const char * restrict format, ...); +- function: fprintf + model: + return_type: nodata + arguments: + - nodata # FILE * restrict stream + - maybedata # const char * restrict format + - maybedata # ... + category: sink + +# int sprintf(char * restrict str, const char * restrict format, ...); +- function: sprintf + model: + return_type: nodata + arguments: + - nodata # char * restrict str + - maybedata # const char * restrict format + - maybedata # ... + category: sink + +# char *vasnprintf(char *resultbuf, size_t *lengthp, const char *format, va_list args) +- function: vasnprintf + model: + return_type: nodata + arguments: + - nodata # char *resultbuf + - maybedata # size_t *lengthp + - maybedata # const char *format + - maybedata # va_list args + category: sink + +# int rpl_fprintf(FILE * restrict stream, const char * restrict format, ...); +- function: rpl_fprintf + model: + return_type: nodata + arguments: + - nodata # FILE * restrict stream + - maybedata # const char * restrict format + - maybedata # ... + category: sink + +# int rpl_vfprintf(FILE * restrict stream, const char * restrict format, va_list ap); +- function: rpl_vfprintf + model: + return_type: nodata + arguments: + - maybedata # FILE * restrict stream + - nodata # const char * restrict format + - maybedata # va_list ap + category: sink + +# size_t fwrite(const void *restrict ptr, size_t size, size_t nitems, FILE *restrict stream); +- function: fwrite + model: + return_type: nodata + arguments: + - maybedata # const void *restrict ptr + - maybedata # size_t size + - maybedata # size_t nitems + - nodata # FILE *restrict stream + category: sink + +# int safe_copy(char *buf, size_t buflen, const char *msg); +- function: safe_copy + model: + return_type: nodata + arguments: + - nodata # char *buf + - nodata # buflen + - maybedata # const char *msg + category: sink + +# void perror(const char *s); +- function: perror + model: + return_type: nodata + arguments: + - maybedata # const char *s + category: sink + +# void rpl_perror(const char *s); +- function: rpl_perror + model: + return_type: nodata + arguments: + - maybedata # const char *s + category: sink + +# void fseterr(FILE *fp); +- function: fseterr + model: + return_type: nodata + arguments: + - maybedata # FILE *fp + category: sink + +# void free(void * ptr); +- function: free + model: + return_type: nodata + arguments: + - maybedata # void * ptr + category: sink + +# FILE * fopen(const char * restrict filename, const char * restrict mode); +- function: fopen + model: + return_type: nodata + arguments: + - maybedata # const char * restrict filename + - maybedata # const char * restrict mode + category: sink + +# int rpl_open(const char *filename, int flags, mode_t mode) +- function: rpl_open + model: + return_type: nodata + arguments: + - maybedata # const char* filename + - maybedata # int flags + - maybedata # mode_t mode + category: sink + +# int orig_open(const char *filename, int flags, mode_t mode) +- function: orig_open + model: + return_type: nodata + arguments: + - maybedata # const char* filename + - maybedata # int flags + - maybedata # mode_t mode + category: sink + +# int orig_openat(int fd, char const *filename, int flags, mode_t mode); +- function: orig_openat + model: + return_type: nodata + arguments: + - nodata # int fd + - maybedata # const char* filename + - maybedata # int flags + - maybedata # mode_t mode + category: sink + +# int rpl_stat(const char* filename, struct stat *buf); +- function: rpl_stat + model: + return_type: nodata + arguments: + - maybedata # const char* filename + - nodata # struct stat *buf + category: sink + +# int orig_stat(const char* filename, struct stat *buf); +- function: orig_stat + model: + return_type: nodata + arguments: + - maybedata # const char* filename + - nodata # struct stat *buf + category: sink + +# int rpl_fstatat(int fd, char const *filename, struct stat *buf, int flags) +- function: rpl_fstatat + model: + return_type: nodata + arguments: + - nodata # int fd + - maybedata # char const *filename + - nodata # struct stat *buf + - maybedata # int flags + category: sink + +# int orig_fstatat(int fd, char const *filename, struct stat *buf, int flags) +- function: orig_fstatat + model: + return_type: nodata + arguments: + - nodata # int fd + - maybedata # char const *filename + - nodata # struct stat *buf + - maybedata # int flags + category: sink + +# int rpl_lstat(const char *filename, struct stat *buf) +- function: rpl_lstat + model: + return_type: nodata + arguments: + - maybedata # char const *filename + - nodata # struct stat *buf + category: sink + +# int orig_lstat(const char *filename, struct stat *buf) +- function: orig_lstat + model: + return_type: nodata + arguments: + - maybedata # char const *filename + - nodata # struct stat *buf + category: sink + +# int normal_fstatat(int fd, char const *file, struct stat *st, int flag) +- function: normal_fstatat + model: + return_type: nodata + arguments: + - nodata # int fd + - maybedata # char const *file + - nodata # struct stat *st + - maybedata # int flag + category: sink + +# int open_safer(char const *file, int flags, ...) +- function: open_safer + model: + return_type: nodata + arguments: + - maybedata # char const *file + - maybedata # int flags + - nodata # ... + category: sink + +# DIR *opendir_safer(const char* name); +- function: opendir_safer + model: + return_type: nodata + arguments: + - maybedata # const char *name + category: sink + +# bool freading (FILE *fp) +- function: freading + model: + return_type: nodata + arguments: + - maybedata # FILE* p + category: sink + +# off_t lseek(int fildes, off_t offset, int whence); +- function: lseek + model: + return_type: nodata + arguments: + - nodata # int fildes + - maybedata # off_t offset + - maybedata # int whence + category: sink + +# int fseeko(FILE *stream, off_t offset, int whence); +- function: fseeko + model: + return_type: nodata + arguments: + - maybedata # FILE* stream + - maybedata # off_t offset + - maybedata # whence + category: sink + +# int rpl_fseeko(FILE *stream, off_t offset, int whence); +- function: rpl_fseeko + model: + return_type: nodata + arguments: + - maybedata # FILE* stream + - maybedata # off_t offset + - maybedata # whence + category: sink + + +# long ftello(FILE *stream); +- function: ftello + model: + return_type: nodata + arguments: + - maybedata # FILE* stream + category: sink + +# int dirfd(DIR *dirp); +- function: dirfd + model: + return_type: nodata + arguments: + - maybedata # DIR *dirp + category: sink + +# int cdb_advance_fd(struct cd_buf *cdb, char const *dir); +- function: fdb_advance_fd + model: + return_type: nodata + arguments: + - nodata # struct cd-buf *cdb + - maybedata # char const* dir + category: sink + +# int access(const char* file, int mode); +- function: access + model: + return_type: nodata + arguments: + - maybedata # const char* file + - maybedata # int mode + category: sink + +# size_t dirlen(char const* file); +- function: dirlen + model: + return_type: nodata + arguments: + - maybedata # char const *file + category: sink + +# int creat (const char *path, mode_t mode) +- function: creat + model: + return_type: nodata + arguments: + - maybedata # const char * path + - maybedata # mode_t mode + category: sink + +# TODO! Remove when issue 753 is resolved. +# int \01_creat (const char *path, mode_t mode) +- function: \01_creat + model: + return_type: nodata + arguments: + - maybedata # const char * path + - maybedata # mode_t mode + category: sink + +# int rpl_openat (int dfd, char const *filename, int flags, ...) +- function: rpl_openat + model: + return_type: nodata + arguments: + - maybedata # int dfd + - maybedata # char const* filename + - maybedata # int flags + - maybedata # ... + category: sink + +# size_t strlen(const char* s); +- function: strlen + model: + return_type: nodata + arguments: + - maybedata # const char *s + category: sink + +# size_t strnlen(const char* s, size_t maxlen); +- function: strnlen + model: + return_type: nodata + arguments: + - maybedata # const char * s + - nodata # size_t maxlen + category: sink + +# uint32_t __builtin_bswap32(uint32_t x); +- function: __builtin_bswap32 + model: + return_type: nodata + arguments: + - maybedata # uint32_t x + category: sink + +# uint64_t __builtin_bswap64(uint64_t x); +- function: __builtin_bswap64 + model: + return_type: nodata + arguments: + - maybedata # uint64_t x + category: sink + +# int fileno(FILE *stream); +- function: fileno + model: + return_type: nodata + arguments: + - maybedata # FILE *stream + category: sink + +# void flockfile(FILE *file); +- function: flockfile + model: + return_type: nodata + arguments: + - maybedata # FILE *file + category: sink + +# void funlockfile(FILE *file); +- function: flockfile + model: + return_type: nodata + arguments: + - maybedata # FILE *file + category: sink + +# int fflush(FILE *stream); +- function: fflush + model: + return_type: nodata + arguments: + - maybedata # FILE *stream; + category: sink + +# int fpurge(FILE *stream); +- function: fpurge + model: + return_type: nodata + arguments: + - maybedata # FILE *stream; + category: sink + +# int rpl_fpurge(FILE* stream); +- function: rpl_fpurge + model: + return_type: nodata + arguments: + - maybedata # FILE *stream; + category: sink + +# int rpl_fflush(FILE *stream); +- function: rpl_fflush + model: + return_type: nodata + arguments: + - maybedata # FILE *stream + category: sink + +# void clear_ungetc_buffer(FILE *fp); +- function: clear_ungetc_buffer + model: + return_type: nodata + arguments: + - maybedata # FILE *fp + category: sink + +# int putc(int c, FILE *stream); +- function: putc + model: + return_type: nodata + arguments: + - maybedata # int c + - maybedata # FILE *stream + category: sink + +# void qsort(void *base, size_t nel, size_t width, int (*compar)(const void*, const void*)); +- function: qsort + model: + return_type: nodata + arguments: + - maybedata # void* base + - nodata # size_t nel + - nodata # size_t width + - maybedata # int (*compar)(const void*, const void*) + category: sink + +# +# Parser functions +# + +# int stat_time_normalize(int result, struct stat* st); +- function: stat_time_normalize + model: + return_type: nodata + arguments: + - data # int result + - nodata # struct stat* st + category: parser + +# int memcmp(const void *s1, const void *s2, size_t n); +- function: memcmp + model: + return_type: nodata + arguments: + - data # const void* s1 + - data # const void* s2 + - data # size_t n + category: parser + +# int strcmp(const char *s1, const char *s2); +- function: strcmp + model: + return_type: nodata + arguments: + - data # const char* s1 + - data # const char* s2 + category: parser + +# int strncmp(const char *s1, const char *s2, size_t n); +- function: strncmp + model: + return_type: nodata + arguments: + - data # const char* s1 + - data # const char* s2 + - data # size_t n + category: parser + +# int isascii(int c) +- function: isascii + model: + return_type: nodata + arguments: + - data # int c + category: parser + +# int isupper(int c); +- function: isupper + model: + return_type: nodata + arguments: + - data # int c + category: parser + +# int isspace(int c); +- function: isspace + model: + return_type: nodata + arguments: + - data # int c + category: parser + +# int isdigit(int c); +- function: isdigit + model: + return_type: nodata + arguments: + - data # int c + category: parser + +# size_t strspn(const char *s, const char *accept); +- function: strspn + model: + return_type: data + arguments: + - data # const char* s + - data # const char* accept + category: parser + +# size_t strcspn(const char *s, const char *charset); +- function: strcspn + model: + return_type: data + arguments: + - data # const char* s + - data # const char* charset + category: parser + +# char *strchr(const char* s, int c); +- function: strchr + model: + return_type: data + arguments: + - data # const char* s + - data # int c + category: parser + +# char *strrchr(const char* s, int c); +- function: strrchr + model: + return_type: data + arguments: + - data # const char* s + - data # int c + category: parser + +# void *memchr(const void *s, int c, size_t n); +- function: memchr + model: + return_type: data + arguments: + - data # const void* s + - data # int c + - data # size_t n + category: parser + +# void *memrchr(const void *s, int c, size_t n); +- function: memrchr + model: + return_type: data + arguments: + - data # const void* s + - data # int c + - data # size_t n + category: parser + +# size_t base_len(char const *name); +- function: base_len + model: + return_type: data + arguments: + - data # char const *name + category: parser + +# char* last_component(char const* name); +- function: last_compnent + model: + return_type: data + arguments: + - data # char const* name + category: parser + +# int tolower(int c); +- function: tolower + model: + return_type: maybedata + arguments: + - maybedata # int c + category: parser + +# +# Non-parser functions +# + +# void exit(int status); +- function: exit + model: + return_type: nodata + arguments: + - nodata # int status + category: nonparser + +# int close(int fd); +- function: close + model: + return_type: nodata + arguments: + - nodata # int fd + category: nonparser + +# TODO: Remove when issue 753, 756 is resolved +# int \01_close(int fd); +- function: \01_close + model: + return_type: nodata + arguments: + - nodata # int fd + category: nonparser + +# int \01_closedir(DIR* dirp); +- function: \01_closedir + model: + return_type: nodata + arguments: + - nodata # DIR* dirp + category: nonparser + +# void __builtin_unreachable(void); +- function: __builtin_unreachable + model: + return_type: nodata + arguments: [] + category: nonparser + +# char *streamsavedir(DIR *dirp, enum savedir_option option); +- function: streamsavedir + model: + return_type: nodata + arguments: + - nodata # DIR *dirp + - nodata # savedir_option option + category: nonparser + +# void * malloc(size_t size); +- function: malloc + model: + return_type: nodata + arguments: + - nodata # size_t size + category: nonparser + +# void* xmalloc(size_t s); +- function: xmalloc + model: + return_type: nodata + arguments: + - nodata # size_t s + category: nonparser + +# void* xnmalloc(size_t n, size_t s); +- function: xnmalloc + model: + return_type: nodata + arguments: + - nodata # size_t n + - nodata # size_t s + category: nonparser + +# void *realloc(void* ptr, size_t size); +- function: realloc + model: + return_type: nodata + arguments: + - nodata # void* ptr + - nodata # size_t size + category: nonparser + +# void* xrealloc(void* p, size_t s); +- function: xrealloc + model: + return_type: nodata + arguments: + - nodata # void* p + - nodata # size_t s + category: nonparser + +# void *x2nrealloc(void *p, size_t *pn, size_t s) +- function: x2nrealloc + model: + return_type: nodata + arguments: + - nodata # void *p + - nodata # size_t *pn + - nodata # size_t s + category: nonparser + +# void * calloc(size_t count, size_t size); +- function: calloc + model: + return_type: nodata + arguments: + - nodata # size_t count + - nodata # size_t size + category: nonparser + +# void *xcalloc(size_t n, size_t s); +- function: xcalloc + model: + return_type: nodata + arguments: + - nodata # size_t n + - nodata # size_t s + category: nonparser + +# void rpl_calloc(size_t count, size_t size); +- function: rpl_calloc + model: + return_type: nodata + arguments: + - nodata # size_t count + - nodata # size_t size + category: nonparser + +# void *xmemdup(void const *p, size_t s); +- function: xmemdup + model: + return_type: nodata + arguments: + - nodata # void const *p + - nodata # size_t s + category: nonparser + +# char *xstrdup(char const *string); +- function: xstrdup + model: + return_type: nodata + arguments: + - nodata # char const *string + category: nonparser + +# void fclose(FILE * stream); +- function: fclose + model: + return_type: nodata + arguments: + - nodata # FILE * stream + category: nonparser + +# int fd_safer(int fd); +- function: fd_safer + model: + return_type: nodata + arguments: + - nodata # int fd + category: nonparser + +# int fd_safer_flag(int fd, int flag); +- function: fd_safer_flag + model: + return_type: nodata + arguments: + - nodata # int fd + - nodata # int flag + category: nonparser + +# int set_cloexec_flag(int desc, bool value); +- function: set_cloexec_flag + model: + return_type: nodata + arguments: + - nodata # int desc + - nodata # bool value + category: nonparser + +# int is_open(int fd) +- function: is_open + model: + return_type: nodata + arguments: + - nodata # int fd + category: nonparser + +# int futimens(int fd, const struct timespec timespec[2]); +- function: futimens + model: + return_type: nodata + arguments: + - nodata # int fd + - nodata # const struct timespec timespec[2] + category: nonparser + +# int utimensat(int fd, const char *path, const struct timespec times[2], int flag); +- function: utimensat + model: + return_type: nodata + arguments: + - nodata # int fd + - nodata # const char *path + - nodata # const struct timespec times[2] + - nodata # int flag + category: nonparser + +# int utimes(const char *path, const struct timespec times[2]); +- function: utimes + model: + return_type: nodata + arguments: + - nodata # const char *path + - nodata # const struct timespec timespec[2] + category: nonparser + +# int futimes(int fildes, const struct timespec timespec[2]); +- function: futimes + model: + return_type: nodata + arguments: + - nodata # int fildes + - nodata # const struct timespec timespec[2] + category: nonparser + +# int fdutimens(int fd, char const *file, struct timespec const timespec[2]); +- function: fdutimens + model: + return_type: nodata + arguments: + - nodata # int fd + - nodata # char const *file + - nodata # struct timespec const timespec[2] + category: nonparser + +# int validate_timespec(struct timespec timespec[2]) +- function: validate_timespec + model: + return_type: nodata + arguments: + - nodata # struct timespec timespec[2] + category: nonparser + +# bool update_timespec(struct stat const *statbuf, struct timespec **ts); +- function: update_timespec + model: + return_type: nodata + arguments: + - nodata # struct stat const *statbuf + - nodata # struct timespec **ts + category: nonparser + +# struct timespec get_stat_atime(struct stat const* st); +- function: get_stat_atime + model: + return_type: nodata + arguments: + - nodata # struct stat const* st + category: nonparser + +# int pipe(int pipefd[2]); +- function: pipe + model: + return_type: nodata + arguments: + - nodata # int pipefd[2] + category: nonparser + +# int chdir(const char *path); +- function: chdir + model: + return_type: nodata + arguments: + - nodata # const char *path + category: nonparser + +# int chdir_long(char *dir); +- function: chdir_long + model: + return_type: nodata + arguments: + - nodata # char *dir + category: nonparser + +# int fchdir(int fd); +- function: fchdir + model: + return_type: nodata + arguments: + - nodata # int fd + category: nonparser + +# int cdb_fchdir(struct cd_buf const *cdb); +- function: cdb_fchdir + model: + return_type: nodata + arguments: + - nodata # struct cd_buf const *cdb; + category: nonparser + +# int lstatat(int fd, char const *name, struct stat *st) +- function: lstatat + model: + return_type: nodata + arguments: + - nodata # int fd + - nodata # char const *name + - nodata # struct stat *st + category: nonparser + +# long __builtin_expect (long exp, long c); +- function: __builtin_expect + model: + return_type: nodata + arguments: + - nodata # long exp + - nodata # long c + category: nonparser + +# int __istype(__darwin_ct_rune_t _c, unsigned long _f); +- function: __istype + model: + return_type: nodata + arguments: + - nodata # __darwin_ct_rune_t _c + - nodata # unsigned long _f + category: nonparser + +# __darwin_ct_rune_t __isctype(__darwin_ct_rune_t _c, unsigned long _f); +- function: __isctype + model: + return_type: nodata + arguments: + - nodata # __darwin_ct_rune_t _c + - nodata # unsigned long _f + category: nonparser + +# int __maskrune(__darwin_ct_rune_t _c, unsigned long _f); +- function: __maskrune + model: + return_type: nodata + arguments: + - nodata # __darwin_ct_rune_t _c + - nodata # unsigned long _f + category: nonparser + +# __darwin_ct_rune_t __tolower(__darwin_ct_rune_t _c); +- function: __tolower + model: + return_type: nodata + arguments: + - nodata # __darwin_ct_rune_t _c + category: nonparser + +# __darwin_ct_rune_t __toupper(__darwin_ct_rune_t _c); +- function: __toupper + model: + return_type: nodata + arguments: + - nodata # __darwin_ct_rune_t _c + category: nonparser + +# const char* _getopt_initialize(int argc, char **argv, const char* optstring, struct _getopt_data *d, int posixly_correct); +- function: _getopt_initialize + model: + return_type: nodata + arguments: + - nodata # int argc + - nodata # char** argv + - nodata # const char* optstring + - nodata # struct _getopt_data* d + - nodata # int posixly_correct + category: nonparser + +# int rpl_getopt_internal (int ___argc, char **___argv, const char *__shortopts, const struct option *__longopts, int *__longind, int __long_only, int __posixly_correct); +- function: rpl_getopt_internal + model: + return_type: nodata + arguments: + - nodata # int ___argc + - nodata # char** ___argv + - nodata # const char* __shortopts + - nodata # const struct option *__longopts + - nodata # int *__longind + - nodata # int __long_only + - nodata # int __posixly_correct + category: nonparser + +# int _getopt_internal_r (int ___argc, char **___argv, const char *__shortopts, const struct option *__longopts, int *__longind, int __long_only, struct _getopt_data *__data, int __posixly_correct); +- function: _getopt_internal_r + model: + return_type: nodata + arguments: + - nodata # int ___argc + - nodata # char** ___argv + - nodata # const char* __shortopts + - nodata # const struct option *__longopts + - nodata # int *__longind + - nodata # int __long_only + - nodata # struct _getopt_data *__data + - nodata # int __posixly_correct + category: nonparser + +# process_long_option (int argc, char **argv, const char *optstring, const struct option *longopts, int *longind, int long_only, struct _getopt_data *d, int print_errors, const char *prefix) +- function: process_long_option + model: + return_type: nodata + arguments: + - nodata # int argc + - nodata # char **argv + - nodata # const char* optstring + - nodata # const struct option* longopts + - nodata # int* longind + - nodata # struct _getopt_data* d + - nodata # int print_errors + - nodata # const char* prefix + category: nonparser + +# void __error(void); +- function: __error + model: + return_type: nodata + arguments: [] + category: nonparser + +# void error(int status, int errnum, const char* format, ...); +- function: error + model: + return_type: nodata + arguments: + - nodata # int status + - nodata # int errnum + - nodata # const char* format + - nodata # ... + category: nonparser + +# void xalloc_die(void); +- function: xalloc_die + model: + return_type: nodata + arguments: [] + category: nonparser + +# __assert_rtn(const char *func, const char *file, int line, const char *failedexpr) +- function: __assert_rtn + model: + return_type: nodata + arguments: + - nodata # const char* func + - nodata # const char* file + - nodata # int line (of code, not file) + - nodata # const char* failedexpr + category: nonparser + +# void abort(void); +- function: abort + model: + return_type: nodata + arguments: [] + category: nonparser + +# void cdb_init(struct cd_buf *cdb); +- function: cdb_init + model: + return_type: nodata + arguments: + - nodata # struct cd_buf *cdb; + category: nonparser + +# int cdb_free(struct cdb *c); +- function: cdb_free + model: + return_type: nodata + arguments: + - nodata # struct cdb *c + category: nonparser + +# int fcntl(int fd, int op, ...); +- function: fcntl + model: + return_type: nodata + arguments: + - nodata # int fd + - nodata # int op + - nodata # ... + category: nonparser + +# int \01_fcntl(int fd, int op, ...); +- function: \01_fcntl + model: + return_type: nodata + arguments: + - nodata # int fd + - nodata # int op + - nodata # ... + category: nonparser + +# int unlink(char const *name); +- function: unlink + model: + return_type: nodata + arguments: + - nodata # char const *name + category: nonparser + +# int rpl_unlink(char const *name); +- function: rpl_unlink + model: + return_type: nodata + arguments: + - nodata # char const *name + category: nonparser + +# int unlinkat(int fd, const char *path, int flag); +- function: unlinkat + model: + return_type: nodata + arguments: + - nodata # int fd + - nodata # const char *path + - nodata # int flag + category: nonparser + +# size_t __builtin_object_size (const void * ptr, int type) +- function: __builtin_object_size + model: + return_type: nodata + arguments: + - nodata # const void* ptr + - nodata # int type + category: nonparser + +# void * __builtin___memset_chk (void *s, int c, size_t n, size_t os); +- function: __bulitin___memset_chk + model: + return_type: nodata + arguments: + - nodata # void* s + - nodata # int c + - nodata # size_t n + - nodata # size_t os + category: nonparser + +# void * __builtin___memcpy_chk (void *dest, const void *src, size_t n, size_t os); +- function: __bulitin___memcpy_chk + model: + return_type: nodata + arguments: + - nodata # void* dest + - nodata # const void* src + - nodata # size_t n + - nodata # size_t os + category: nonparser + +# int __builtin___snprintf_chk(char* s, size_t maxlen, int flag, size_t os, const char* fmt, ...); +- function: __builtin___snprintf_chk + model: + return_type: nodata + arguments: + - nodata # char* s + - nodata # size_t maxlen + - nodata # int flag + - nodata # size_t os + - nodata # const char* fmt + - nodata # ... + category: nonparser + +# int __builtin___sprintf_chk(char* s, int flag, size_t os, const char* fmt, ...); +- function: __builtin___sprintf_chk + model: + return_type: nodata + arguments: + - nodata # char* s + - nodata # int flag + - nodata # size_t os + - nodata # const char* fmt + - nodata # ... + category: nonparser + +# char* __builtin___stpcpy_chk(char* dest, const char* src, size_t os); +- function: __builtin___stpcpy_chk + model: + return_type: nodata + arguments: + - nodata # char* dest + - nodata # const char* src + - nodata # size_t os + category: nonparser + +# char* __builtin___strcpy_chk(char* dest, const char* src, size_t os); +- function: __builtin___strcpy_chk + model: + return_type: nodata + arguments: + - nodata # char* dest + - nodata # const char* src + - nodata # size_t os + category: nonparser + +# int strerror_r (int errnum, char *strerrbuf, size_t buflen); +- function: strerror_r + model: + return_type: nodata + arguments: + - nodata # int errnum + - nodata # strerrbuf + - nodata # size_t buflen + category: nonparser + +# int rpl_strerror_r (int __errnum, char *__strerrbuf, size_t __buflen); +- function: rpl_strerror_r + model: + return_type: nodata + arguments: + - nodata # int __errnum + - nodata # __strerrbuf + - nodata # size_t buflen + category: nonparser + +# char *strerror(int errnum); +- function: strerror + model: + return_type: nodata + arguments: + - nodata # int errnum + category: nonparser + +# const char* strerror_override(int errnum) +- function: strerror_override + model: + return_type: nodata + arguments: + - nodata # int errnum + category: nonparser + +# void gettime(struct timespec *ts); +- function: gettime + model: + return_type: nodata + arguments: + - nodata # struct timespec *ts + category: nonparser + +# int clock_getime(clockid_t clock_id, struct timespec *tp); +- function: clock_gettime + model: + return_type: nodata + arguments: + - nodata # clockid_t clock_id + - nodata # struct timespec* tp + category: nonparser + +# void print_errno_message(int errnum) +- function: print_errno_message + model: + return_type: nodata + arguments: + - nodata # int errnum + category: nonparser + +# size_t xsum(size_t size1, size_t size2); +- function: xsum + model: + return_type: nodata + arguments: + - nodata # size_t size1 + - nodata # size_t size2 + category: nonparser + +- function: main + model: + return_type: nodata + arguments: + - nodata # int argc + - data # char * argv[] + - data # char * envp[] + category: nonparser diff --git a/tools/vast-query/extract_func.py b/tools/vast-query/extract_func.py new file mode 100644 index 0000000000..1b06016bb5 --- /dev/null +++ b/tools/vast-query/extract_func.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 +# primitive call/def grabber tool + +import sys +import os +from typing import List +import re + +class location: + def __init__(self, reference: int, filename: str, line: int): + self.reference = reference + self.filename = filename + self.line = line + + def __str__(self): + return "#loc" + self.reference + " = " + self.filename + ":" + str(self.line) + + @staticmethod + def parse(literal: str): + if m := re.match(r'#loc(\d+) = loc\(("[^"]+"):(\d+):(\d+)\)', literal): + return location(m.group(1), m.group(2), int(m.group(3))) + + return None + +class fdef: + def __init__(self, signature: str, line: int, filename: str, ref): + self.signature = signature + self.line = line + self.ref = ref + self.filename = filename + + @staticmethod + def parse(literal: str, line: int, filename: str): + if m := re.search(r"hl.func @([^:^\s]+)", literal): + mm = re.search(r"loc\(#loc(\d+)\)", literal) + if mm == None: + mm = re.search(r'loc\(("[^"]+"):(\d+):(\d+)\)', literal) + if mm == None: + print(f"\033[93mWARNING Assuming '{literal.strip()}' ({filename}:{line}) as local function\033[0m ") + return fdef(m.group(1), line, filename, None) + else: + return fdef(m.group(1), line, filename, mm.group(1)) + else: + return fdef(m.group(1), line, filename, mm.group(1)) + + return None + + def __str__(self): + return self.signature + ":" + self.filename + ":" + str(self.line) + +class fcall: + def __init__(self, definition: fdef, line: int, filename: str, ref = None): + self.function = definition + self.line = line + self.ref = ref + self.filename = filename + + @staticmethod + def parse(literal: str, line: int, filename: str): + if m := re.search(r"hl.call @([^\)]+\))", literal): + mm = re.search(r"loc\(#loc(\d+)\)", literal) + if mm == None: + print(f"Error: function call '{literal}' does not have refernce check") + exit(1) + return fcall(m.group(1), line, filename, mm.group(1)) + else: + print(f"literal {literal} is not a valid function call.") + return None + + def __str__(self): + return self.function.__str__() + " call" + + +def smart_parse(line: str, loc: int, filename: str): + if re.search(r"hl.call @([^:^\s]+)", line): + return fcall.parse(line, loc, filename) + if re.match(r'#loc(\d+) = loc\(("[^"]+"):(\d+):(\d+)\)', line): + return location.parse(line) + if re.search(r"hl.func @([^:^\s]+)", line): + return fdef.parse(line, loc, filename) + + return None + +def main(): + if len(sys.argv) != 3: + print("USAGE: python3 extract_func.py ") + exit(0) + + if not os.path.exists(sys.argv[1]): + print(f"file {sys.argv[1]} does not exist.") + exit(1) + + locations: List[location] = [] + functions: List[fdef] = [] + calls: List[fcall] = [] + + files = [] + + if os.path.isfile(sys.argv[1]): + files = [sys.argv[1]] + else: + files = [f for f in os.listdir(sys.argv[1]) if os.path.isfile(sys.argv[1] + f) and f.endswith(".mlir")] + + for file in files: + with open(sys.argv[1] + file, 'r') as f: + for (index, line) in enumerate(f.readlines()): + obj = smart_parse(line, index+1, file) + + if not obj: continue + + if isinstance(obj, location): + # line location -> location info. + locations.append(obj) + elif isinstance(obj, fdef): + functions.append(obj) + elif isinstance(obj, fcall): + calls.append(obj) + + f.close() + + consolidated_calls = {} + + for function_call in calls: + if function_call.function in consolidated_calls.keys(): + consolidated_calls[function_call.function][0].append((function_call.filename, function_call.line)) + consolidated_calls[function_call.function][1].append(function_call.ref) + else: + consolidated_calls[function_call.function] = [[(function_call.filename, function_call.line)], [function_call.ref]] + + # sort + consolidated_calls = dict(sorted(consolidated_calls.items())) + + with open(sys.argv[2], "w") as f: + + for function in consolidated_calls.keys(): + + # gathering function calls... + filerefs = set() + + for refs in consolidated_calls[function][1]: + found = False + for l in locations: + if l.reference == refs: + filerefs.add(l.filename) + found = True + break + if not found: + print(f"cannot find ref {refs}!") + exit(1) + + defs = set() + + function_friendly_name = re.search(r"[^\(]+", function).group(0) + + for definition in functions: + if definition.signature == function_friendly_name: + defs.add((definition.filename, definition.line)) + + if definition.ref == None: + continue + + found = False + for l in locations: + if l.reference == definition.ref: + filerefs.add(l.filename) + found = True + break + if not found: + if os.path.isfile(definition.ref.replace("\"", "").replace("\'", "")): + filerefs.add(definition.ref) + continue + print(f"\033[93mcannot find file or ref '{definition.ref}' of {definition.signature} ({definition.filename}:{definition.line})\033[0m") + filerefs.add(definition.ref) + + + # write to yaml + consolidated_calls[function][0].sort() + + f.write(f" - signature: {function}\n\t call lines: ") + + f.writelines(["\n\t\t- " + i[0] + ":" + str(i[1]) if i != None else "" for i in consolidated_calls[function][0]]) + + f.write("\n\t definitions: ") + + f.writelines(["\n\t\t- " + d[0] + ":" + str(d[1]) if d != None else "" for d in defs]) + + f.write("\n\t file references: ") + + f.writelines(["\n\t\t- " + (f if f != None else "unknown") for f in filerefs]) + + f.write("\n") + + print(f"\033[92mWrote to {sys.argv[2]}\033[0m") + +if __name__ == '__main__': + if sys.version_info < (3, 8): + print("Python version above 3.8 is required.") + exit(1) + main() \ No newline at end of file