/*  Tarlz - Archiver with multimember lzip compression
    Copyright (C) 2013-2019 Antonio Diaz Diaz.

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#define max_file_size ( LLONG_MAX - header_size )
enum { header_size = 512 };
typedef uint8_t Tar_header[header_size];

enum Offsets {
  name_o = 0, mode_o = 100, uid_o = 108, gid_o = 116, size_o = 124,
  mtime_o = 136, chksum_o = 148, typeflag_o = 156, linkname_o = 157,
  magic_o = 257, version_o = 263, uname_o = 265, gname_o = 297,
  devmajor_o = 329, devminor_o = 337, prefix_o = 345 };

enum Lengths {
  name_l = 100, mode_l = 8, uid_l = 8, gid_l = 8, size_l = 12,
  mtime_l = 12, chksum_l = 8, typeflag_l = 1, linkname_l = 100,
  magic_l = 6, version_l = 2, uname_l = 32, gname_l = 32,
  devmajor_l = 8, devminor_l = 8, prefix_l = 155 };

enum Typeflag {
  tf_regular = '0', tf_link = '1', tf_symlink = '2', tf_chardev = '3',
  tf_blockdev = '4', tf_directory = '5', tf_fifo = '6', tf_hiperf = '7',
  tf_global = 'g', tf_extended = 'x' };

const uint8_t ustar_magic[magic_l] =
  { 0x75, 0x73, 0x74, 0x61, 0x72, 0 };			// "ustar\0"

inline bool verify_ustar_magic( const uint8_t * const header )
  { return std::memcmp( header + magic_o, ustar_magic, magic_l ) == 0; }

inline void init_tar_header( Tar_header header )    // set magic and version
  {
  std::memset( header, 0, header_size );
  std::memcpy( header + magic_o, ustar_magic, magic_l - 1 );
  header[version_o] = header[version_o+1] = '0';
  }

inline void print_octal( uint8_t * const buf, int size, unsigned long long num )
  { while( --size >= 0 ) { buf[size] = '0' + ( num % 8 ); num /= 8; } }


// Round "size" to the next multiple of header size (512).
//
inline unsigned long long round_up( const unsigned long long size )
  {
  const int rem = size % header_size;
  const int padding = rem ? header_size - rem : 0;
  return size + padding;
  }


inline bool dotdot_at_i( const char * const filename, const int i )
  {
  return ( filename[i] == '.' && filename[i+1] == '.' &&
           ( i == 0 || filename[i-1] == '/' ) &&
           ( filename[i+2] == 0 || filename[i+2] == '/' ) );
  }


class Resizable_buffer
  {
  char * p;
  unsigned long size_;			// size_ < LONG_MAX

public:
  // must be >= 87 for format_member_name
  enum { default_initial_size = 2 * header_size };

  explicit Resizable_buffer( const unsigned long initial_size =
                             default_initial_size )
    : p( (char *)std::malloc( initial_size ) ), size_( p ? initial_size : 0 ) {}
  ~Resizable_buffer() { if( p ) std::free( p ); p = 0; size_ = 0; }

  bool resize( const unsigned long long new_size )
    {
    if( new_size >= LONG_MAX ) return false;
    if( size_ < new_size )
      {
      char * const tmp = (char *)std::realloc( p, new_size );
      if( !tmp ) return false;
      p = tmp; size_ = new_size;
      }
    return true;
    }
  char * operator()() const { return p; }
  unsigned long size() const { return size_; }
  };


class Extended		// stores metadata from/for extended records
  {
  std::string linkpath_;		// these are the real metadata
  std::string path_;
  long long file_size_;			// >= 0 && <= max_file_size

  // cached sizes; if full_size_ < 0 they must be recalculated
  mutable long long edsize_;		// extended data size
  mutable long long padded_edsize_;	// edsize rounded up
  mutable long long full_size_;		// header + padded edsize
  mutable long long linkpath_recsize_;
  mutable long long path_recsize_;
  mutable int file_size_recsize_;

  // true if CRC present in parsed or formatted records
  mutable bool crc_present_;

  void calculate_sizes() const;

public:
  static const std::string crc_record;

  Extended()
    : file_size_( 0 ), edsize_( 0 ), padded_edsize_( 0 ), full_size_( 0 ),
      linkpath_recsize_( 0 ), path_recsize_( 0 ), file_size_recsize_( 0 ),
      crc_present_( false ) {}

  void reset()
    { linkpath_.clear(); path_.clear(); file_size_ = 0; edsize_ = 0;
      padded_edsize_ = 0; full_size_ = 0; linkpath_recsize_ = 0;
      path_recsize_ = 0; file_size_recsize_ = 0; crc_present_ = false; }

  bool empty() const
    { return linkpath_.empty() && path_.empty() && file_size_ == 0; }

  const std::string & linkpath() const { return linkpath_; }
  const std::string & path() const { return path_; }
  long long file_size() const { return file_size_; }
  long long get_file_size_and_reset( const Tar_header header );

  void linkpath( const char * const lp ) { linkpath_ = lp; full_size_ = -1; }
  void path( const char * const p ) { path_ = p; full_size_ = -1; }
  void file_size( const long long fs ) { full_size_ = -1;
    file_size_ = ( fs >= 0 && fs <= max_file_size ) ? fs : 0; }

  long long full_size() const
    { if( full_size_ < 0 ) calculate_sizes(); return full_size_; }

  bool crc_present() const { return crc_present_; }
  long long format_block( Resizable_buffer & rbuf ) const;
  bool parse( const char * const buf, const unsigned long long edsize,
              const bool permissive );
  void fill_from_ustar( const Tar_header header );
  };


enum {
  min_dictionary_bits = 12,
  min_dictionary_size = 1 << min_dictionary_bits,
  max_dictionary_bits = 29,
  max_dictionary_size = 1 << max_dictionary_bits,
  min_member_size = 36,
  min_data_size = 2 * min_dictionary_size,
  max_data_size = 2 * max_dictionary_size };


inline bool isvalid_ds( const unsigned dictionary_size )
  { return ( dictionary_size >= min_dictionary_size &&
             dictionary_size <= max_dictionary_size ); }


const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 };	// "LZIP"

struct Lzip_header
  {
  uint8_t data[6];			// 0-3 magic bytes
					//   4 version
					//   5 coded_dict_size
  enum { size = 6 };

  bool verify_magic() const
    { return ( std::memcmp( data, lzip_magic, 4 ) == 0 ); }

  bool verify_prefix( const int sz ) const	// detect (truncated) header
    {
    for( int i = 0; i < sz && i < 4; ++i )
      if( data[i] != lzip_magic[i] ) return false;
    return ( sz > 0 );
    }
  bool verify_corrupt() const			// detect corrupt header
    {
    int matches = 0;
    for( int i = 0; i < 4; ++i )
      if( data[i] == lzip_magic[i] ) ++matches;
    return ( matches > 1 && matches < 4 );
    }

  uint8_t version() const { return data[4]; }
  bool verify_version() const { return ( data[4] == 1 ); }

  unsigned dictionary_size() const
    {
    unsigned sz = ( 1 << ( data[5] & 0x1F ) );
    if( sz > min_dictionary_size )
      sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 );
    return sz;
    }
  };


struct Lzip_trailer
  {
  uint8_t data[20];	//  0-3  CRC32 of the uncompressed data
			//  4-11 size of the uncompressed data
			// 12-19 member size including header and trailer
  enum { size = 20 };

  unsigned data_crc() const
    {
    unsigned tmp = 0;
    for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; }
    return tmp;
    }

  unsigned long long data_size() const
    {
    unsigned long long tmp = 0;
    for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; }
    return tmp;
    }

  unsigned long long member_size() const
    {
    unsigned long long tmp = 0;
    for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; }
    return tmp;
    }

  bool verify_consistency() const	// check internal consistency
    {
    const unsigned crc = data_crc();
    const unsigned long long dsize = data_size();
    if( ( crc == 0 ) != ( dsize == 0 ) ) return false;
    const unsigned long long msize = member_size();
    if( msize < min_member_size ) return false;
    const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size;
    if( mlimit > dsize && msize > mlimit ) return false;
    const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1;
    if( dlimit > msize && dsize > dlimit ) return false;
    return true;
    }
  };


class CRC32
  {
  uint32_t data[256];		// Table of CRCs of all 8-bit messages.

public:
  CRC32( const bool castagnoli = false )
    {
    const unsigned cpol = 0x82F63B78U;	// CRC32-C  Castagnoli polynomial.
    const unsigned ipol = 0xEDB88320U;	// IEEE 802.3 Ethernet polynomial.
    const unsigned poly = castagnoli ? cpol : ipol;

    for( unsigned n = 0; n < 256; ++n )
      {
      unsigned c = n;
      for( int k = 0; k < 8; ++k )
        { if( c & 1 ) c = poly ^ ( c >> 1 ); else c >>= 1; }
      data[n] = c;
      }
    }

  void update_byte( uint32_t & crc, const uint8_t byte ) const
    { crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); }

  void update_buf( uint32_t & crc, const uint8_t * const buffer,
                   const int size ) const
    {
    uint32_t c = crc;
    for( int i = 0; i < size; ++i )
      c = data[(c^buffer[i])&0xFF] ^ ( c >> 8 );
    crc = c;
    }

  // Calculates the crc of size bytes except a window of 8 bytes at pos
  uint32_t windowed_crc( const uint8_t * const buffer, const int pos,
                         const int size ) const
    {
    uint32_t crc = 0xFFFFFFFFU;
    update_buf( crc, buffer, pos );
    update_buf( crc, buffer + pos + 8, size - pos - 8 );
    return crc ^ 0xFFFFFFFFU;
    }
  };

extern const CRC32 crc32c;

const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
const char * const bad_dict_msg = "Invalid dictionary size in member header.";
const char * const corrupt_mm_msg = "Corrupt header in multimember file.";
const char * const trailing_msg = "Trailing data not allowed.";
const char * const bad_hdr_msg = "Corrupt or invalid tar header.";
const char * const gblrec_msg = "Error in global extended records.";
const char * const extrec_msg = "Error in extended records.";
const char * const mcrc_msg = "Missing CRC in extended records.";
const char * const end_msg = "Archive ends unexpectedly.";
const char * const mem_msg = "Not enough memory.";
const char * const mem_msg2 = "Not enough memory. Try a lower compression level.";
const char * const fv_msg1 = "Format violation: extended header followed by EOF blocks.";
const char * const fv_msg2 = "Format violation: extended header followed by global header.";
const char * const fv_msg3 = "Format violation: consecutive extended headers found.";
const char * const posix_msg = "This does not look like a POSIX tar archive.";
const char * const posix_lz_msg = "This does not look like a POSIX tar.lz archive.";

// defined in common.cc
void xinit_mutex( pthread_mutex_t * const mutex );
void xinit_cond( pthread_cond_t * const cond );
void xdestroy_mutex( pthread_mutex_t * const mutex );
void xdestroy_cond( pthread_cond_t * const cond );
void xlock( pthread_mutex_t * const mutex );
void xunlock( pthread_mutex_t * const mutex );
void xwait( pthread_cond_t * const cond, pthread_mutex_t * const mutex );
void xsignal( pthread_cond_t * const cond );
void xbroadcast( pthread_cond_t * const cond );
unsigned long long parse_octal( const uint8_t * const ptr, const int size );
int readblock( const int fd, uint8_t * const buf, const int size );
int writeblock( const int fd, const uint8_t * const buf, const int size );

// defined in common_decode.cc
class Arg_parser;
bool block_is_zero( const uint8_t * const buf, const int size );
bool format_member_name( const Extended & extended, const Tar_header header,
                         Resizable_buffer & rbuf, const bool long_format );
bool show_member_name( const Extended & extended, const Tar_header header,
                       const int vlevel, Resizable_buffer & rbuf );
bool check_skip_filename( const Arg_parser & parser,
                          std::vector< char > & name_pending,
                          const char * const filename, const int filenames );

// defined in create.cc
enum Solidity { no_solid, bsolid, dsolid, asolid, solid };
extern int cl_owner;
extern int cl_group;
extern int cl_data_size;
extern Solidity solidity;
bool copy_file( const int infd, const int outfd, const long long max_size = -1 );
bool writeblock_wrapper( const int outfd, const uint8_t * const buffer,
                         const int size );
bool write_eof_records( const int outfd, const bool compressed );
const char * remove_leading_dotslash( const char * const filename,
                                      const bool dotdot = false );
bool fill_headers( const char * const filename, Extended & extended,
                   Tar_header header, long long & file_size, const int flag );
bool block_is_full( const Extended & extended,
                    const unsigned long long file_size,
                    unsigned long long & partial_data_size );
void set_error_status( const int retval );
int final_exit_status( int retval, const bool show_msg = true );
unsigned ustar_chksum( const uint8_t * const header );
bool verify_ustar_chksum( const uint8_t * const header );
bool has_lz_ext( const std::string & name );
int concatenate( const std::string & archive_name, const Arg_parser & parser,
                 const int filenames );
int encode( const std::string & archive_name, const Arg_parser & parser,
            const int filenames, const int level, const int num_workers,
            const int out_slots, const int debug_level, const bool append,
            const bool dereference );

// defined in create_lz.cc
int encode_lz( const char * const archive_namep, const Arg_parser & parser,
               const int dictionary_size, const int match_len_limit,
               const int num_workers, const int outfd, const int out_slots,
               const int debug_level, const bool dereference );

// defined in delete.cc
class Lzip_index;
bool safe_seek( const int fd, const long long pos );
int tail_copy( const char * const archive_namep, const Arg_parser & parser,
               std::vector< char > & name_pending,
               const Lzip_index & lzip_index, const long long istream_pos,
               const int infd, const int outfd, int retval );
int delete_members( const std::string & archive_name, const Arg_parser & parser,
                    const int filenames, const bool missing_crc,
                    const bool permissive );

// defined in delete_lz.cc
int delete_members_lz( const char * const archive_namep,
                       const Arg_parser & parser,
                       std::vector< char > & name_pending,
                       const Lzip_index & lzip_index,
                       const int filenames, const int infd, const int outfd,
                       const bool missing_crc, const bool permissive );

// defined in exclude.cc
namespace Exclude {
void add_pattern( const std::string & arg );
bool excluded( const char * const filename );
} // end namespace Exclude

// defined in extract.cc
enum Program_mode { m_none, m_append, m_concatenate, m_create, m_delete,
                    m_diff, m_extract, m_list };
int decode( const std::string & archive_name, const Arg_parser & parser,
            const int filenames, const int num_workers, const int debug_level,
            const Program_mode program_mode, const bool ignore_ids,
            const bool keep_damaged, const bool missing_crc,
            const bool permissive );

// defined in list_lz.cc
struct LZ_Decoder;
int archive_read_lz( LZ_Decoder * const decoder, const int infd,
                     long long & file_pos, const long long member_end,
                     const long long cdata_size, uint8_t * const buf,
                     const int size, const char ** msg );
int parse_records_lz( LZ_Decoder * const decoder, const int infd,
                      long long & file_pos, const long long member_end,
                      const long long cdata_size, long long & data_pos,
                      Extended & extended, const Tar_header header,
                      Resizable_buffer & rbuf, const char ** msg,
                      const bool permissive );
int skip_member_lz( LZ_Decoder * const decoder, const int infd,
                    long long & file_pos, const long long member_end,
                    const long long cdata_size, long long & data_pos,
                    long long rest, const char ** msg );
int list_lz( const char * const archive_namep, const Arg_parser & parser,
             std::vector< char > & name_pending, const Lzip_index & lzip_index,
             const int filenames, const int debug_level, const int infd,
             const int num_workers, const bool missing_crc,
             const bool permissive );

// defined in lzip_index.cc
int seek_read( const int fd, uint8_t * const buf, const int size,
               const long long pos );

// defined in main.cc
extern int verbosity;
struct stat;
int hstat( const char * const filename, struct stat * const st );
int open_instream( const std::string & name );
int open_outstream( const std::string & name, const bool create = true );
void cleanup_and_fail( const int retval = 1 );	// terminate the program
void show_error( const char * const msg, const int errcode = 0,
                 const bool help = false );
void show_file_error( const char * const filename, const char * const msg,
                      const int errcode = 0 );
void internal_error( const char * const msg );
