1
\$\begingroup\$

Intro

(See the version 1.0.0.)

(See the GitHub repository.)

This time I have got rid of some generic classes and adapted the entire code base to deal with byte alphabet.


Code


package io.github.coderodde.compressor.app;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;

/**
 * This class implements a Huffman compressor for binary (byte-wise) data.
 * 
 * @author Rodion "rodde" Efremov
 * @version 1.0.0 (Nov 14, 2025)
 * @since 1.0.0 (Nov 14, 2025))
 */
public final class App {
    
    /**
     * The exit status for abnormal termination.
     */
    private static final int EXIT_FAILURE = 1;
    
    /**
     * This extension is added to the compressed files.
     */
    private static final String COMPRESSED_FILE_EXTENSION = ".huf";

    public static void main(String[] args) {
        
        try {
            if (args.length == 1) {
                compressFile(args[0]);
            } else if (args.length == 2) {
                decompressFile(args[0], args[1]);
            } else {
                printUsage();
            }
        } catch (final Exception ex) {
            error(ex.getMessage());
            System.exit(EXIT_FAILURE);
        }
    }
        
    private static void printUsage() {
        final String jarName = getJarFileName();
        
        System.out.printf(
                String.format(
                        "Usage: %s FILE - to compress FILE into FILE.huf\n", 
                        jarName));
        
        System.out.printf(
                String.format(
                        "       %s FILE.huf OUTPUT_FILE - " + 
                        "to decompress FILE.huf into OUTPUT_FILE\n", 
                        jarName));
    }
    
    private static void compressFile(final String inputFileName) throws IOException {
        final File inputFile = new File(inputFileName);
        
        if (!inputFile.exists()) {
            error(String.format("The input file '%s' does not exist.\n", 
                                inputFileName));
            
            System.exit(EXIT_FAILURE);
        }
        
        if (inputFileName.endsWith(COMPRESSED_FILE_EXTENSION)) {
            error(String.format("Input file '%s' already seems to  be compressed.\n", 
                                inputFileName));
            
            System.exit(EXIT_FAILURE);
        }
        
        final Path path = inputFile.toPath();
        
        long ta = System.currentTimeMillis();
        final byte[] rawData = Files.readAllBytes(path);
        long tb = System.currentTimeMillis();
        
        info(String.format(
                "Read all file bytes in %d milliseconds.\n", tb - ta));
        
        ta = System.currentTimeMillis();
        final byte[] compressedData = HuffmanByteCompressor.compress(rawData);
        tb = System.currentTimeMillis();
        
        info(String.format(
                "Compressed the data in %d milliseconds.\n", tb - ta));
        
        final File outputFile = 
                new File(inputFileName + COMPRESSED_FILE_EXTENSION);
        
        ta = System.currentTimeMillis();
        Files.write(outputFile.toPath(), 
                    compressedData);
        tb = System.currentTimeMillis();
        
        info(String.format(
                "Written the compressed data in %d milliseconds.\n", tb - ta));
    }
    
    private static void decompressFile(final String compressedFileName,
                                       final String outputFileName) throws IOException {
        
        final File compressedFile = new File(compressedFileName);
        final File outputFile = new File(outputFileName);
        
        if (!compressedFile.exists()) {
            error(String.format("Compressed file '%s' does not exist.\n", 
                                compressedFileName));
            
            System.exit(EXIT_FAILURE);
        }
        
        if (!outputFile.exists()) {
            outputFile.createNewFile();
        }
        
        final Path compressedFilePath = compressedFile.toPath();
        
        long ta = System.currentTimeMillis();
        final byte[] compressedData = Files.readAllBytes(compressedFilePath);
        long tb = System.currentTimeMillis();
        
        info(String.format(
                "Read the compressed data in %d milliseconds.\n", tb - ta));
        
        final Path outputFilePath = outputFile.toPath();
        
        ta = System.currentTimeMillis();
        final byte[] originalData =
                HuffmanByteDecompressor.decompress(compressedData);
        tb = System.currentTimeMillis();
        
        info(String.format(
                "Decompressed the data in %d milliseconds.\n", tb - ta));
        
        ta = System.currentTimeMillis();
        Files.write(outputFilePath, originalData);
        tb = System.currentTimeMillis();
        
        info(String.format(
                "Written the decompressed data in %d milliseconds.\n",
                tb - ta));
    }
    
    private static void error(final String message) {
        System.err.printf("[ERROR] %s", message);
    }
    
    private static void info(final String message) {
        System.out.printf("[INFO] %s", message);
    }
    
    /**
     * Obtains the current name of this JAR-file.
     * 
     * @return the name of this JAR-file.
     */
    private static String getJarFileName() {
        return new File(App.class.getProtectionDomain().getCodeSource()
                                                       .getLocation()
                                                       .getPath())
                                                       .getName();
    }
}

package io.github.coderodde.compressor.app;

import java.util.Objects;

/**
 * This class is responsible for decompressing the actual compressed data to the
 * compression source data.
 * 
 * @author Rodion "rodde" Efremov
 * @version 1.0.0 (Nov 17, 2025)
 * @since 1.0.0 (Nov 17, 2025)
 */
public final class ByteArrayCompressedDataReader {

    /**
     * The resultant decompressed data.
     */
    private final byte[] outputRawData;
    
    /**
     * The input compressed data.
     */
    private final byte[] inputCompressedData;
    
    /**
     * The index of the bit where compressed data begins.
     */
    private final long startingBitIndex;
    
    /**
     * The (Huffman) decoder tree.
     */
    private final ByteHuffmanDecoderTree<Byte> decoderTree;
    
    /**
     * Constructs this compressed data reader/decompressor.
     * 
     * @param outputRawData       the resultant decompressed data.
     * @param inputCompressedData the input compressed data.
     * @param startingBitIndex    the index of the first bit to decompress right
     *                            after the header.
     * @param decoderTree         the decoder tree.
     */
    public ByteArrayCompressedDataReader(final byte[] outputRawData,
                                         final byte[] inputCompressedData,
                                         final long startingBitIndex,
                                         final ByteHuffmanDecoderTree<Byte> 
                                                 decoderTree) {
        
        this.outputRawData = 
                Objects.requireNonNull(
                        outputRawData, 
                        "The output raw data is null");
        
        this.inputCompressedData = 
                Objects.requireNonNull(
                        inputCompressedData,
                        "The input compressed data is null");
        
        this.decoderTree =
                Objects.requireNonNull(
                        decoderTree, 
                        "The input decoder tree is null");
        
        this.startingBitIndex = startingBitIndex;
    }
    
    /**
     * Decompresses and reads the compressed data.
     */
    public void read() {
        final int totalBytes = outputRawData.length;
        long currentBitIndex = startingBitIndex;
        
        for (int byteIndex = 0; 
                 byteIndex != totalBytes;
                 byteIndex++) {
            
            final Byte dataByte = decoderTree.decode(inputCompressedData,
                                                     currentBitIndex);
            outputRawData[byteIndex] = dataByte;
            currentBitIndex += decoderTree.getPreviousCodeLength();
        }
    }
}

package io.github.coderodde.compressor.app;

import java.util.Objects;

/**
 * This class is responsible for writing the actual compressed data to a byte 
 * array. This class does not handle the compression header; it is handled by
 * {@link io.github.coderodde.compressor.app.ByteArrayHeaderWriter}.
 * 
 * @author Rodion "rodde" Efremov
 * @version 1.0.0 (Nov 17, 2025)
 * @since 1.0.0 (Nov 17, 2025)
 */
public final class ByteArrayCompressedDataWriter {

    /**
     * The target byte array to which all the compressed data will end up.
     */
    private final byte[] compressedOutputData;
    
    /**
     * The actual data to be compressed.
     */
    private final byte[] inputRawData;
    
    /**
     * The index of the first bit in the compressed data. We need this in order
     * to omit the compression header.
     */
    private final long startingBitIndex;
    
    /**
     * The actual (Huffman) code table.
     */
    private final ByteHuffmanCodeTable codeTable;
    
    /**
     * Constructs this writer.
     * 
     * @param compressedOutputData the compressed data byte array.
     * @param inputRawData         the input raw data byte array.
     * @param startingBitIndex     the starting bit index for the writing.
     * @param codeTable            the byte encoding table.
     */
    public ByteArrayCompressedDataWriter(
            final byte[] compressedOutputData,
            final byte[] inputRawData,
            final long startingBitIndex,
            final ByteHuffmanCodeTable codeTable) {
        
        this.compressedOutputData = 
                Objects.requireNonNull(
                        compressedOutputData,
                        "The output compressed data is null");
        
        this.inputRawData =
                Objects.requireNonNull(
                        inputRawData, 
                        "The input raw data is null");
        
        this.codeTable = 
                Objects.requireNonNull(
                        codeTable, 
                        "The input code table is null");
        
        this.startingBitIndex = startingBitIndex;
    }
    
    /**
     * Writes the entire compressed data of {@code inputRawData}.
     */
    public void write() {
        long currentBitIndex = startingBitIndex;
        
        for (final byte b : inputRawData) {
            final CodeWord codeword  = codeTable.get(b).reverse();
            final int codewordLength = codeword.length();
            
            writeCodeWord(compressedOutputData,
                          currentBitIndex,
                          codeword);
            
            currentBitIndex += codewordLength;
        }
    }
    
    /**
     * Writes a single codeword to the compressed output data byte array.
     * 
     * @param compressedOutputData the compressed output data byte array.
     * @param currentBitIndex      the current bit index.
     * @param codeword             the codeword to write.
     */
    private static void writeCodeWord(final byte[] compressedOutputData,
                                      final long currentBitIndex,
                                      final CodeWord codeword) {
        
        int byteIndex = (int) (currentBitIndex / Byte.SIZE); 
        int bitIndex  = (int) (currentBitIndex % Byte.SIZE);
        
        final int codewordLength = codeword.length();
        
        for (int codewordBitIndex = 0;
                 codewordBitIndex < codewordLength;
                 codewordBitIndex++) {
            
            if (codeword.get(codewordBitIndex)) {
                setBit(compressedOutputData,
                       byteIndex,
                       bitIndex);
            }
            
            bitIndex++;
            
            if (bitIndex == Byte.SIZE) {
                bitIndex = 0;
                byteIndex++;
            }
        }
    }
    
    /**
     * Sets the {@code bitIndex}th bit in 
     * {@code compressedOutputData[byteIndex]}.
     * 
     * @param compressedOutputData the compressed output data byte array.
     * @param byteIndex            the target byte index.
     * @param bitIndex             the target bit index.
     */
    private static void setBit(final byte[] compressedOutputData,
                               final int byteIndex,
                               final int bitIndex) {
        
        final byte mask = (byte)(1 << bitIndex);
        compressedOutputData[byteIndex] |= mask;
    }
}

package io.github.coderodde.compressor.app;

import static io.github.coderodde.compressor.app.Configuration.BYTES_PER_CODEWORD_MAX;
import static io.github.coderodde.compressor.app.Configuration.BYTES_PER_CODE_SIZE;
import static io.github.coderodde.compressor.app.Configuration.BYTES_PER_RAW_DATA_LENGTH;
import java.nio.ByteBuffer;
import java.util.Objects;
import java.nio.ByteOrder;
import java.util.Arrays;

/**
 * This class implements the reader returning the file header data such as the 
 * length of the raw data being compressed and its decoding Huffman tree.
 * 
 * @author Rodion "rodde" Efremov
 * @version 1.0.0 (Nov 16, 2025)
 * @since 1.0.0 (Nov 16, 2025)
 */
public final class ByteArrayHeaderReader {

    /**
     * The compressed data byte array containing the header.
     */
    private final byte[] compressedData;
    
    /**
     * We cache this in order users of this class can query the length of the 
     * raw data that would result from decompression.
     */
    private final int rawDataLength;
    
    /**
     * The code table being read.
     */
    private final ByteHuffmanCodeTable codeTable;
    
    public ByteArrayHeaderReader(final byte[] compressedData) {
        this.compressedData =
                Objects.requireNonNull(compressedData,
                                       "The input compressed data is null");
        
        final int codeTableSize = readCodeTableSize();
        this.rawDataLength = readRawDataLength();
        this.codeTable = readCodeTable(codeTableSize);
    }
    
    public int getRawDataLength() {
        return rawDataLength;
    }
    
    public ByteHuffmanCodeTable getCodeTable() {
        return codeTable;
    }
    
    private int readCodeTableSize() {
        final byte[] codeTableSizeBytes = new byte[BYTES_PER_CODE_SIZE];
        
        System.arraycopy(compressedData,
                         0,
                         codeTableSizeBytes, 
                         0, 
                         codeTableSizeBytes.length);
        
        final ByteBuffer byteBuffer = ByteBuffer.wrap(codeTableSizeBytes);
        return byteBuffer.order(ByteOrder.LITTLE_ENDIAN).getInt();
    }
    
    private int readRawDataLength() {
        final byte[] rawDataLengthBytes = new byte[BYTES_PER_RAW_DATA_LENGTH];
        
        System.arraycopy(compressedData,
                         BYTES_PER_CODE_SIZE,
                         rawDataLengthBytes, 
                         0, 
                         rawDataLengthBytes.length);
        
        final ByteBuffer byteBuffer = ByteBuffer.wrap(rawDataLengthBytes);
        return byteBuffer.order(ByteOrder.LITTLE_ENDIAN).getInt();
    }
    
    private ByteHuffmanCodeTable readCodeTable(final int codeTableSize) {
        final ByteHuffmanCodeTable codeTable = new ByteHuffmanCodeTable();
        final int codeEntryLength = Utils.getCodeEntryLength();
        
        int byteCursor = BYTES_PER_CODE_SIZE + BYTES_PER_RAW_DATA_LENGTH;
        
        for (int codeIndex = 0; codeIndex < codeTableSize; ++codeIndex) {
            readCodeEntry(codeTable,
                          compressedData,
                          byteCursor);
            
            byteCursor += codeEntryLength;
        }
        
        return codeTable;
    }
    
    private static void readCodeEntry(final ByteHuffmanCodeTable codeTable,
                                      final byte[] compressedData,
                                      final int byteCursor) {
        final byte value  = compressedData[byteCursor];
        final byte length = compressedData[byteCursor + 1];
        final byte[] codeEntryData = 
                Arrays.copyOfRange(compressedData, 
                                   byteCursor + 2, 
                                   byteCursor + 2 + BYTES_PER_CODEWORD_MAX);
        
        final CodeWord codeword = inferCodeWord(length, codeEntryData);
        
        codeTable.put(value, codeword);
    }
    
    private static CodeWord inferCodeWord(final int length,
                                          final byte[] codeData) {
        final int bits = ByteBuffer.wrap(codeData)
                                   .order(ByteOrder.LITTLE_ENDIAN)
                                   .getInt();
        
        final CodeWord codeword = new CodeWord(length);
        
        int mask = 1;
        
        for (int i = 0; i < length; ++i) {
            if ((bits & mask) != 0) {
                codeword.set(i);
            }
            
            mask <<= 1;
        }
        
        return codeword;
    }
}

package io.github.coderodde.compressor.app;

import static io.github.coderodde.compressor.app.Configuration.BYTES_PER_CODEWORD_MAX;
import static io.github.coderodde.compressor.app.Configuration.BYTES_PER_CODE_SIZE;
import static io.github.coderodde.compressor.app.Configuration.BYTES_PER_RAW_DATA_LENGTH;
import static io.github.coderodde.compressor.app.Configuration.CODE_TABLE_CAPACITY;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Objects;

/**
 * This class writes the file header to the compressed file.
 * 
 * @author Rodion "rodde" Efremov
 * @version 1.0.0 (Nov 16, 2025)
 * @since 1.0.0 (Nov 16, 2025)
 */
public final class ByteArrayHeaderWriter {

    /**
     * The minimum length of the raw data byte array.
     */
    private static final int MINIMUM_RAW_DATA_LENGTH = 1;
    
    /**
     * The length of the raw data in bytes.
     */
    private final int rawDataLength;
    
    /**
     * The output data containing the compressed file.
     */
    private final byte[] outputData;
    
    /**
     * The index of the bit in the compressed data byte array at which writing
     * compressed data must begin.
     */
    private long dataStartBitIndex;
    
    /**
     * The code table to write to the compressed file header.
     */
    private final ByteHuffmanCodeTable codeTable;
    
    public ByteArrayHeaderWriter(final int rawDataLength,
                                 final byte[] outputData,
                                 final ByteHuffmanCodeTable codeTable) {
        
        checkRawDataLength(rawDataLength);
        Objects.requireNonNull(outputData, "The output data array is null");
        Objects.requireNonNull(codeTable, "The input code table is null");
        checkCodeTable(codeTable);
        
        this.rawDataLength = rawDataLength;
        this.outputData    = outputData;
        this.codeTable     = codeTable;
    }
    
    public void write() {
        writeCodeSize();
        writeRawDataLength();
        writeCodeTable();
    }
    
    public long getDataStartBitIndex() {
        return dataStartBitIndex;
    }
    
    /**
     * Writes the code size to the very first 32-bit integer of the compressed
     * file.
     */
    private void writeCodeSize() {
        final byte[] codeSizeBytes = 
                ByteBuffer.allocate(BYTES_PER_CODE_SIZE)
                          .order(ByteOrder.LITTLE_ENDIAN)
                          .putInt(codeTable.size())
                          .array();
        
        System.arraycopy(codeSizeBytes, 
                         0, 
                         outputData, 
                         0, 
                         codeSizeBytes.length);
    }
    
    /**
     * Writes the length of the raw data file to the second 32-bit word in the 
     * compressed file.
     */
    private void writeRawDataLength() {
        final byte[] rawDataLengthBytes = 
                ByteBuffer.allocate(BYTES_PER_RAW_DATA_LENGTH)
                          .order(ByteOrder.LITTLE_ENDIAN)
                          .putInt(rawDataLength)
                          .array();
        
        System.arraycopy(rawDataLengthBytes,
                         0,
                         outputData, 
                         BYTES_PER_CODE_SIZE, 
                         rawDataLengthBytes.length);
    }
    
    /**
     * Writes the actual code table to the compressed file header to starting 
     * from the 8th byte.
     */
    private void writeCodeTable() {
        int currentByteIndex = BYTES_PER_CODE_SIZE 
                             + BYTES_PER_RAW_DATA_LENGTH;
        
        for (int intValue = 0; 
                 intValue < CODE_TABLE_CAPACITY; 
                 intValue++) {
        
            // Extract the least significant byte from 'intValue':
            final byte value = (byte)(intValue & 0xff);
            final CodeWord codeword = codeTable.get(value);
            
            if (codeword != null) {
                outputData[currentByteIndex++] = value;
                outputData[currentByteIndex++] = (byte) codeword.length();
                
                final byte[] codewordBytes = codeword.toByteArray();

                System.arraycopy(codewordBytes, 
                                 0, 
                                 outputData, 
                                 currentByteIndex, 
                                 codewordBytes.length);

                currentByteIndex += BYTES_PER_CODEWORD_MAX;
            }
        }
        
        this.dataStartBitIndex = currentByteIndex * Byte.SIZE;
    }
    
    private static void checkRawDataLength(final int rawDataLength) {
        if (rawDataLength < MINIMUM_RAW_DATA_LENGTH) {
            throw new TooShortRawDataLengthException(
                    String.format(
                            "The length of the raw data is too small: %d. " + 
                            "Must be at least %d.", 
                            rawDataLength, 
                            MINIMUM_RAW_DATA_LENGTH));
        }
    }
    
    private static void checkCodeTable(final ByteHuffmanCodeTable codeTable) {
        if (codeTable.isEmpty()) {
            throw new EmptyCodeTableException();
        }
    }
}

package io.github.coderodde.compressor.app;

import static io.github.coderodde.compressor.app.Configuration.CODE_TABLE_CAPACITY;

/**
 * This class models the byte frequency distributions.
 * 
 * @author Rodion "rodde" Efremov
 * @version 1.0.0 (Nov 13, 2025)
 * @since 1.0.0 (Nov 13, 2025)
 */
public final class ByteFrequencyDistribution {

    /**
     * The actual table mapping bytes to their frequencies.
     */
    private final long[] frequencies = new long[CODE_TABLE_CAPACITY]; 
    
    /**
     * Returns the number of symbol/weight mappings.
     * 
     * @return the size of this distribution. 
     */
    public int size() {
        int sz = 0;
        
        for (final long b : frequencies) {
            if (b != 0) {
                ++sz;
            }
        }
        
        return sz;
    }
    
    /**
     * Returns {@code true} if and only if this distribution is empty.
     * 
     * @return a Boolean flag indicating whether this distribution is empty.
     */
    public boolean isEmpty() {
        return size() == 0;
    }
    
    public void incrementFrequency(final byte value) {
        ++frequencies[Byte.toUnsignedInt(value)];
    }
    
    public long getFrequency(final byte value) {
        return frequencies[Byte.toUnsignedInt(value)];
    }
}

package io.github.coderodde.compressor.app;

import static io.github.coderodde.compressor.app.Configuration.CODE_TABLE_CAPACITY;

/**
 * This class implements a Huffman code table over byte (8-bit) alphabet.
 * 
 * @author Rodion "rodde" Efremov
 * @version 1.0.0 (Nov 20, 2025)
 * @since 1.0.0 (Nov 20, 2025)
 */
public final class ByteHuffmanCodeTable {
    
    /**
     * The actual codeword table.
     */
    private final CodeWord[] table = new CodeWord[CODE_TABLE_CAPACITY];
    
    /**
     * The number of byte/codeword mappings in this code table.
    */
    private int size;
    
    /**
     * Associates the input byte {@code value} with the codeword 
     * {@code codeword}.
     * 
     * @param value    the byte key.
     * @param codeword the value codeword.
     */
    public void put(final byte value, final CodeWord codeword) {
        table[Byte.toUnsignedInt(value)] = codeword;
        ++size;
    }
    
    /**
     * Accesses the codeword associated with the byte value {@code value}.
     * 
     * @param value the byte key.
     * @return the associated codeword.
     */
    public CodeWord get(final byte value) {
        return table[Byte.toUnsignedInt(value)];
    }
    
    /**
     * Returns the number of byte/codeword mappings in this code table.
     * 
     * @return the size of this code table.
     */
    public int size() {
        return size;
    }
    
    /**
     * Returns {@code true} if and only if this code table is empty.
     * 
     * @return {@code true} if empty, {@code false} otherwise.
     */
    public boolean isEmpty() {
        return size == 0;
    }
    
    @Override
    public boolean equals(final Object object) {
        if (object == null) {
            return false;
        }
        
        if (!getClass().equals(object.getClass())) {
            return false;
        }
        
        if (object == this) {
            return true;
        }
        
        final ByteHuffmanCodeTable other = (ByteHuffmanCodeTable) object;
        
        if (size != other.size()) {
            return false;
        }
        
        for (int i = 0; i < CODE_TABLE_CAPACITY; ++i) {
            final CodeWord cw1 = get((byte) i);
            final CodeWord cw2 = other.get((byte) i);
            
            if (cw1 == null && cw2 == null) {
                continue;
            }
            
            if (cw1 != null && cw2 == null) {
                return false;
            }
            
            if (cw1 == null && cw2 != null) {
                return false;
            }
            
            // Here, both cw1 and cw2 are not null:
            if (!cw1.equals(cw2)) {
                return false;
            }
        }
        
        return true;
    }
}

package io.github.coderodde.compressor.app;

import static io.github.coderodde.compressor.app.Configuration.CODE_TABLE_CAPACITY;
import java.util.HashSet;
import java.util.Objects;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Set;

/**
 * This class implements the Huffman code builder over weight distributions.
 * 
 * @author Rodion "rodde" Efremov
 * @version 2.0.1 (Nov 14, 2025)
 * @since 1.0.0 (Nov 12, 2025)
 */
public final class ByteHuffmanCodeTableBuilder {
    
    private ByteHuffmanCodeTableBuilder() {
        // Hide constructor.
    }
    
    public static ByteHuffmanCodeTable
        buildCode(final ByteFrequencyDistribution byteFrequencyDistribution) {
            
        Objects.requireNonNull(byteFrequencyDistribution,
                               "The input byte frequency distribution is null");
        
        if (byteFrequencyDistribution.isEmpty()) {
            throw new IllegalArgumentException(
                    "The input byte frequency distribution is empty");
        }
        
        final ByteHuffmanCodeTable codeTable = new ByteHuffmanCodeTable();
        final Queue<WeightedByteSet> queue   = new PriorityQueue<>();
        
        for (int i = 0; i < CODE_TABLE_CAPACITY; ++i) {
            // Grab the 8 least significant bits of 'i':
            final byte value = (byte)(i & 0xff);
            final long frequency =
                    byteFrequencyDistribution.getFrequency(value);
            
            if (frequency > 0L) {
                // Once here, value is present in the compressed data:
                final Set<Byte> set = new HashSet<>();
                set.add(value);
                codeTable.put(value, new CodeWord(0));
                queue.add(new WeightedByteSet(set, frequency));
            }
        }
        
        while (queue.size() > 1) {
            final WeightedByteSet entry1 = queue.remove();
            final WeightedByteSet entry2 = queue.remove();
            
            for (final byte value : entry1.getSet()) {
                codeTable.get(value).prependBit(true);
            }
            
            for (final byte value : entry2.getSet()) {
                codeTable.get(value).prependBit(false);
            }
            
            entry1.getSet().addAll(entry2.getSet());
            
            queue.add(new WeightedByteSet(
                            entry1.getSet(),
                            entry1.getTotalWeight() + 
                            entry2.getTotalWeight()));
        }
        
        return codeTable;
    }
}

package io.github.coderodde.compressor.app;

import java.util.Objects;

/**
 * This class implements the Huffman decoding tree.
 * 
 * @author Rodion "rodde" Efremov
 * @param <S> the alphabet symbol type.
 * @version 1.0.0 (Nov 14, 2025)
 * @since 1.0.0 (Nov 14, 2025)
 */
public final class ByteHuffmanDecoderTree<S> {
    
    /**
     * This static inner class implements the Huffman decoding tree node.
     * 
     * @param <S> the alphabet symbol type. 
     */
    private static final class TreeNode {
        Byte value;
        TreeNode zeroChild;
        TreeNode oneChild;
    }
    
    /**
     * The root node of the tree.
     */
    private final TreeNode root = new TreeNode();
    
    /**
     * Caches the code length of the previously decoded symbol.
     */
    private long previousCodeLength = -1L;
    
    /**
     * Constructs this Huffman decoding tree.
     * 
     * @param codeTable the code table for which to construct the decoding tree. 
     */
    public ByteHuffmanDecoderTree(final ByteHuffmanCodeTable codeTable) {
        Objects.requireNonNull(codeTable, "The input code table is null");
        
        for (int value = 0; 
                 value < Configuration.CODE_TABLE_CAPACITY;
                 value++) {
            final CodeWord codeword = codeTable.get((byte) value);
            
            if (codeword != null) {
                final Byte byteValue = Byte.valueOf((byte) value);
                insert(byteValue, codeword);
            }
        }
    }
    
    /**
     * Decodes a codeword to a symbol.
     * 
     * @param compressedData the compressed data for decompression.
     * @param bitIndex       the index of the starting bit of a codeword to 
     *                       scan.
     * @return the encoded symbol.
     */
    public byte decode(final byte[] compressedData, long bitIndex) {
        Objects.requireNonNull(compressedData, "The input raw data is null");
        
        previousCodeLength = 0;
        
        TreeNode node = root;
        
        while (node.value == null) {
            final boolean bit = readBit(compressedData,
                                        bitIndex);
            
            node = bit ? node.oneChild : node.zeroChild;
            
            ++bitIndex;
            ++previousCodeLength;
        }
        
        return node.value;
    }
    
    public long getPreviousCodeLength() {
        return previousCodeLength;
    }
    
    private static boolean readBit(final byte[] rawData, final long bitIndex) {
        final int byteIndex = (int) (bitIndex / Byte.SIZE);
        final byte targetByte = rawData[byteIndex];
        final byte mask = (byte)(1 << (bitIndex % Byte.SIZE));
        
        return ((mask & targetByte) != 0);
    }
    
    /**
     * Inserts the symbol/codeword pair into this tree.
     * 
     * @param symbol   the symbol to insert.
     * @param codeword the codeword associated with the input symbol.
     */
    private void insert(final Byte value, final CodeWord codeword) {
        TreeNode node = root;
        
        for (int i = codeword.length() - 1; i >= 0; --i) {
            final boolean bit = codeword.get(i);
            
            if (bit) { 
                // Bit is set to 1.
                if (node.oneChild == null) {
                    node.oneChild = new TreeNode();
                }
                
                node = node.oneChild;
            } else {
                // Bit is set to 0.
                if (node.zeroChild == null) {
                    node.zeroChild = new TreeNode();
                }
                
                node = node.zeroChild;
            }
        }
        
        if (node.value != null) {
            throw new IllegalStateException(
                    String.format(
                            "Dublicate codeword [%s]: " + 
                                "two symbols share the same code.", 
                            codeword.toString()));
        }
        
        node.value = value;
    }
}

package io.github.coderodde.compressor.app;

/**
 * This class provides a method for building instances of
 * {@link io.github.coderodde.compressor.app.ByteFrequencyDistribution} over byte-wise
 * data.
 * 
 * @author Rodion "rodde" Efremov
 * @version 1.0.0 (Nov 16, 2025)
 * @since 1.0.0 (Nov 16, 2025)
 */
public final class ByteWeightDistributionBuilder {
    
    private ByteWeightDistributionBuilder() {
        
    }
    
    /**
     * Builds and returns the weight distribution of the input raw data.
     * 
     * @param rawData the byte array holding the data to compress.
     * 
     * @return the weight distribution.
     */
    public static ByteFrequencyDistribution 
        buildByteWeightDistribution(final byte[] rawData) {
        
        final ByteFrequencyDistribution frequencyDistribution =
                new ByteFrequencyDistribution();
        
        for (final byte value : rawData) {
            frequencyDistribution.incrementFrequency(value);
        }
        
        return frequencyDistribution;
    }
}

package io.github.coderodde.compressor.app;

import java.util.BitSet;

/**
 * This class implements a <b>binary</b> code word in data compression 
 * scenarios.
 * 
 * @author Rodion "rodde" Efremov
 * @version 1.1.0 (Nov 13, 2025)
 * @since 1.0.0 (Oct 28, 2025)
 */
public class CodeWord {

    private int length;
    private final BitSet bits;
    
    public CodeWord(final int length) {
        checkLength(length);
        this.length = length;
        this.bits = new BitSet(length);
    }
    
    public CodeWord reverse() {
        final CodeWord reversed = new CodeWord(length);
        
        for (int i = length - 1, j = 0; i >= 0; --i, ++j) {
            if (get(i)) {
                reversed.set(j);
            }
        }
        
        return reversed;
    }
    
    public byte[] toByteArray() {
        return bits.toByteArray();
    }
    
    public void prependBit(final boolean bit) {
        if (bit) {
            bits.set(length);
        }
        
        ++length;
    }
    
    public int length() {
        return length;
    }
    
    public boolean get(final int index) {
        checkIndex(index);
        return bits.get(index);
    }
    
    public void set(final int index) {
        checkIndex(index);
        bits.set(index);
    }
    
    public void unset(final int index) {
        checkIndex(index);
        bits.set(index, false);
    }
    
    @Override
    public boolean equals(final Object object) {
        if (object == this) {
            return true;
        }
        
        if (object == null) {
            return false;
        }
    
        if (!getClass().equals(object.getClass())) {
            return false;
        }
        
        final CodeWord other = (CodeWord) object;
        
        if (length() != other.length()) {
            return false;
        }
        
        for (int i = 0; i < length(); ++i) {
            if (get(i) != other.get(i)) {
                return false;
            }
        }
        
        return true;
    }
    
    @Override
    public String toString() {
        final StringBuilder sb = new StringBuilder(length);
        
        for (int i = length - 1; i >= 0; --i) {
            sb.append(get(i) ? "1" : "0");
        }
        
        return sb.toString();
    }
    
    private void checkIndex(final int index) {
        if (index < 0) {
            throw new IndexOutOfBoundsException(
                    String.format("index(%d) < 0", index));
        }
        
        if (index >= this.length) {
            throw new IndexOutOfBoundsException(
                    String.format("index(%d) >= length(%d)", 
                                  index, 
                                  length));
        }
    }
    
    private static void checkLength(final int length) {
        if (length < 0) {
            throw new IllegalArgumentException(
                    String.format("length(%d) < 1", length));
        }
    }
}

package io.github.coderodde.compressor.app;

/**
 * This class contains configuration constants.
 * 
 * @author Rodion "rodde" Efremov
 * @version 1.0.0 (Nov 19, 2025)
 * @since 1.0.0 (Nov 19, 2025)
 */
final class Configuration {

    /**
     * This capacity stands for {@code 2^8 = 256}, the number of distinct byte 
     * values.
     */
    static final int CODE_TABLE_CAPACITY = 256;
    
    /**
     * Specifies how many bytes to use in order to communicate the size of the
     * Huffman code.
     */
    static final int BYTES_PER_CODE_SIZE = 4;
    
    /**
     * Specifies how many bytes to use in order to communicate the actual length
     * (in bytes) of the input byte array.
     */
    static final int BYTES_PER_RAW_DATA_LENGTH = 4;
    
    /**
     * Specifies how many bytes to reserve for describing the byte being 
     * encoded. 
     */
    static final int BYTES_PER_BYTE_DESCRIPTOR = 1;
    
    /**
     * Specifies how many bytes to reserve for signalling the codeword length.
     */
    static final int BYTES_PER_CODEWORD_LENGTH = 1;
    
    /**
     * Specifies how many bytes to use for the codeword.
     */
    static final int BYTES_PER_CODEWORD_MAX = 4;
    
    private Configuration() {
        
    }
}

package io.github.coderodde.compressor.app;

/**
 * The instances of this class are thrown when dealing with empty code tables.
 * 
 * @author Rodion "rodde" Efremov
 * @version 1.0.0 (Nov 16, 2025)
 * @since 1.0.0 (Nov 16, 2025)
 */
public final class EmptyCodeTableException extends RuntimeException {

    public EmptyCodeTableException() {
        super("The code table is empty");
    }
}

package io.github.coderodde.compressor.app;

import java.util.Objects;

/**
 * This class implements a method for compressing byte-wise files via Huffman-
 * coding.
 * 
 * @author Rodion "rodde" Efremov
 * @version 1.0.0 (Nov 14, 2025)
 * @since 1.0.0 (Nov 14, 2025)
 */
public final class HuffmanByteCompressor {
    
    /**
     * Compresses the {@code rawData} {@code byte}-array using the input
     * {@code weightDistribution}.
     * 
     * @param rawData            the raw data to compress.
     * 
     * @return the full binary {@code byte}-array containing all the data needed
     *         to decompress the compressed file.
     */
    public static byte[] compress(final byte[] rawData) {
            
        Objects.requireNonNull(rawData);
        
        if (rawData.length == 0) {
            throw new IllegalArgumentException("The input byte array is empty");
        }
        
        final ByteFrequencyDistribution byteWeightDistribution =
                ByteWeightDistributionBuilder
                        .buildByteWeightDistribution(rawData);
               
        final ByteHuffmanCodeTable codeTable = 
                ByteHuffmanCodeTableBuilder.buildCode(byteWeightDistribution);
        
        final int countNumberOfBytesInCodeHeader = 
                Utils.countBytesInCodeHeader(codeTable.size());
        
        final long countNumberOfBytesInRawData = 
                Utils.countBitsInRawData(codeTable, 
                                         rawData);
        
        final byte[] outputData = 
                new byte[(int)(countNumberOfBytesInCodeHeader + 
                               countNumberOfBytesInRawData)];
        
        final ByteArrayHeaderWriter headerWriter = 
                new ByteArrayHeaderWriter(rawData.length, 
                                          outputData,
                                          codeTable);
        
        headerWriter.write();
        
        final long startingDataBitIndex = headerWriter.getDataStartBitIndex();
        
        final ByteArrayCompressedDataWriter dataWriter = 
                new ByteArrayCompressedDataWriter(outputData,
                                                  rawData, 
                                                  startingDataBitIndex, 
                                                  codeTable);
        dataWriter.write();
        
        return outputData;
    }
}

package io.github.coderodde.compressor.app;

/**
 * This class implements a method for <b>decompressing</b> byte-wise files via 
 * Huffman-coding.
 * 
 * @author Rodion "rodde" Efremov
 * @version 1.0.0 (Nov 14, 2025)
 * @since 1.0.0 (Nov 14, 2025)
 */
public final class HuffmanByteDecompressor {

    public static byte[] decompress(final byte[] compressedData) {
        final ByteArrayHeaderReader headerReader = 
                new ByteArrayHeaderReader(compressedData);
        
        final int rawDataLength = headerReader.getRawDataLength();
        final byte[] rawData = new byte[rawDataLength];
        
        final ByteHuffmanCodeTable codeTable = headerReader.getCodeTable();
        final ByteHuffmanDecoderTree<Byte> decoder = 
                new ByteHuffmanDecoderTree<>(codeTable);
        
        final int startingBitIndex = 
                Utils.countBytesInCodeHeader(codeTable.size()) * Byte.SIZE;
        
        final ByteArrayCompressedDataReader dataReader = 
                new ByteArrayCompressedDataReader(rawData, 
                                                  compressedData, 
                                                  startingBitIndex,
                                                  decoder);
        
        dataReader.read();
        return rawData;
    }
}

package io.github.coderodde.compressor.app;

/**
 * This exception class implements an exception that is thrown if 
 * {@link io.github.coderodde.compressor.app.WeightDistribution} does not contain a
 * requested symbol.
 * 
 * @author Rodion "rodde" Efremov
 * @version 1.0.0 (Nov 13, 2025)
 * @since 1.0.0 (Nov 13, 2025)
 */
public class SymbolNotFoundException extends RuntimeException {

    public SymbolNotFoundException(final String exceptionMessage) {
        super(exceptionMessage);
    }
}

package io.github.coderodde.compressor.app;

/**
 * The instances of this class denote the situations where the compressed raw 
 * data is too short.
 * 
 * @author Rodion "rodde" Efremov
 * @version 1.0.0 (Nov 16, 2025)
 * @since 1.0.0 (Nov 16, 2025)
 */
public final class TooShortRawDataLengthException extends RuntimeException {

    public TooShortRawDataLengthException(final String exceptionMessage) {
        super(exceptionMessage);
    }
}

package io.github.coderodde.compressor.app;

import static io.github.coderodde.compressor.app.Configuration.BYTES_PER_BYTE_DESCRIPTOR;
import static io.github.coderodde.compressor.app.Configuration.BYTES_PER_CODEWORD_LENGTH;
import static io.github.coderodde.compressor.app.Configuration.BYTES_PER_CODEWORD_MAX;
import static io.github.coderodde.compressor.app.Configuration.BYTES_PER_CODE_SIZE;
import static io.github.coderodde.compressor.app.Configuration.BYTES_PER_RAW_DATA_LENGTH;

/**
 * This class contains some various helper methods.
 * 
 * @author Rodion "rodde" Efremov
 * @version 1.1.0 (Nov 21, 2025)
 * @since 1.0.0 (Nov 16, 2025)
 */
public final class Utils {

    private Utils() {
        
    }
    
    public static long countBitsInRawData(final ByteHuffmanCodeTable code,
                                          final byte[] rawData) {
        long bits = 0;
        
        for (final byte b : rawData) {
            bits += code.get(b).length();
        }
        
        return bits / Byte.SIZE + (bits % Byte.SIZE != 0 ? 1L : 0L);
    }
    
    public static int getCodeEntryLength() {
        return BYTES_PER_BYTE_DESCRIPTOR + 
               BYTES_PER_CODEWORD_LENGTH +
               BYTES_PER_CODEWORD_MAX;
    }
    
    public static int countBytesInCodeHeader(final int codeSize) {
        final int codeEntryLength = getCodeEntryLength();
        
        return (codeSize * codeEntryLength) + BYTES_PER_CODE_SIZE
                                            + BYTES_PER_RAW_DATA_LENGTH;
    }
}

package io.github.coderodde.compressor.app;

import java.util.Set;

/**
 * The instances of this class hold sets of symbols with the sum of their 
 * frequencies.
 * 
 * @author Rodion "rodde" Efremov
 * @version 1.0.0 (Nov 14, 2025)
 * @since 1.0.0 (Nov 14, 2025)
 */
final class WeightedByteSet implements Comparable<WeightedByteSet> {

    private final Set<Byte> set;
    private final long totalSetWeight;

    WeightedByteSet(final Set<Byte> set,
                      final long totalSetWeight) {
        this.set = set;
        this.totalSetWeight = totalSetWeight;
    }
    
    Set<Byte> getSet() {
        return set;
    }
    
    long getTotalWeight() {
        return totalSetWeight;
    }

    @Override
    public int compareTo(final WeightedByteSet o) {
        return Double.compare(totalSetWeight, o.totalSetWeight); // Should be Long.compare
    }
}

package io.github.coderodde.compressor.app;

import java.util.Arrays;
import org.junit.Test;
import static org.junit.Assert.*;

public class ByteArrayCompressedDataReaderTest {
    
    private static final int STRESS_TEST_ITERATIONS = 50;
    
    private static final byte[] COMPRESSED_DATA = new byte[10_000];
    
    @Test
    public void readSmallTest() {
        final byte[] rawData = { 45, 46, 47, 47, 46, 47  };
        
        final ByteFrequencyDistribution wd =
                ByteWeightDistributionBuilder
                        .buildByteWeightDistribution(rawData);
        
        final ByteHuffmanCodeTable codeTable = 
                ByteHuffmanCodeTableBuilder.buildCode(wd);
        
        final ByteArrayCompressedDataWriter writer = 
                new ByteArrayCompressedDataWriter(COMPRESSED_DATA, 
                                                  rawData,
                                                  0,
                                                  codeTable);
        writer.write();
        
        final ByteHuffmanDecoderTree<Byte> decoderTree = 
                new ByteHuffmanDecoderTree<>(codeTable);
        
        final byte[] resultRawData = new byte[rawData.length];
        
        final ByteArrayCompressedDataReader reader = 
                new ByteArrayCompressedDataReader(resultRawData, 
                                                  COMPRESSED_DATA, 
                                                  0,
                                                  decoderTree);
        reader.read();
        assertTrue(Arrays.equals(rawData, resultRawData));
    }
    
//    @Test
    public void readStressTest() {
        for (int i = 0; i < STRESS_TEST_ITERATIONS; ++i) {
            Arrays.fill(COMPRESSED_DATA, (byte) 0);
            
            final byte[] rawData = TestUtils.getRawData();
            
            // Write the compressed data:
            final ByteFrequencyDistribution wd = 
                    ByteWeightDistributionBuilder
                            .buildByteWeightDistribution(rawData);
            
            final ByteHuffmanCodeTable codeTable = 
                    ByteHuffmanCodeTableBuilder.buildCode(wd);
            
            final ByteArrayCompressedDataWriter writer = 
                    new ByteArrayCompressedDataWriter(COMPRESSED_DATA,
                                                      rawData, 
                                                      0,
                                                      codeTable);
            
            writer.write();
            
            // Read the source data:
            final ByteHuffmanDecoderTree<Byte> decoderTree = 
                    new ByteHuffmanDecoderTree<>(codeTable);
            
            final byte[] resultRawData = new byte[rawData.length];
            
            final ByteArrayCompressedDataReader reader = 
                    new ByteArrayCompressedDataReader(resultRawData, 
                                                      COMPRESSED_DATA, 
                                                      0, 
                                                      decoderTree);
            
            reader.read();
            
            assertTrue(Arrays.equals(rawData, resultRawData));
        }
    }
}

package io.github.coderodde.compressor.app;

import java.util.Arrays;
import org.junit.Test;
import static org.junit.Assert.*;
import org.junit.Before;

public final class ByteArrayHeaderReaderTest {   
    
    private static final int STRESS_TEST_ITERATIONS = 50;
    
    private static final byte[] COMPRESSED_DATA = new byte[10_000];
    
    @Before
    public void clearCompressedData() {
        Arrays.fill(COMPRESSED_DATA, (byte) 0);
    }
    
    @Test
    public void smallTest() {
        final byte[] rawData = { 88, 40, 48 };
        final ByteFrequencyDistribution wd = 
                ByteWeightDistributionBuilder
                        .buildByteWeightDistribution(rawData);
        
        final ByteHuffmanCodeTable expectedCodeTable = 
                ByteHuffmanCodeTableBuilder.buildCode(wd);
        
        final ByteArrayHeaderWriter writer = new ByteArrayHeaderWriter(rawData.length,
                                                             COMPRESSED_DATA,
                                                             expectedCodeTable);
        
        writer.write();
        
        final ByteArrayHeaderReader reader = 
                new ByteArrayHeaderReader(COMPRESSED_DATA);
        
        final int resultRawDataLength = reader.getRawDataLength();
        final ByteHuffmanCodeTable resultCodeTable = reader.getCodeTable();
        
        assertEquals(rawData.length, resultRawDataLength);
        assertEquals(expectedCodeTable, resultCodeTable);
    }
    
    @Test
    public void stressTest() {
        for (int i = 0; i < STRESS_TEST_ITERATIONS; ++i) {
            // Clear the output buffer in order to get rid of junk:
            Arrays.fill(COMPRESSED_DATA, (byte) 0);
            
            final byte[] rawData = TestUtils.getRawData();
            
            final ByteFrequencyDistribution weightDistribution =
                    ByteWeightDistributionBuilder
                            .buildByteWeightDistribution(rawData);
            
            final ByteHuffmanCodeTable expectedCodeTable = 
                    ByteHuffmanCodeTableBuilder.buildCode(weightDistribution);
            
            final ByteArrayHeaderWriter writer = 
                    new ByteArrayHeaderWriter(rawData.length,
                                         COMPRESSED_DATA, 
                                         expectedCodeTable);
            
            writer.write();
            
            final ByteArrayHeaderReader reader = 
                    new ByteArrayHeaderReader(COMPRESSED_DATA);
            
            final int rawDataLength = reader.getRawDataLength();
            final ByteHuffmanCodeTable readCodeTable = reader.getCodeTable();
            
            assertEquals(rawData.length, rawDataLength);
            assertEquals(expectedCodeTable, readCodeTable);
        }
    }
}

package io.github.coderodde.compressor.app;

import java.util.Arrays;
import static org.junit.Assert.assertTrue;
import org.junit.Test;

public class HuffmanByteDecompressorTest {

    private static final int STRESS_TEST_ITERATIONS = 50;
    
    @Test
    public void smallTest() {
        final byte[] rawData = { 45, 46, 47, 47, 46, 47 };
        final byte[] compressedRawData =
                HuffmanByteCompressor.compress(rawData);
        
        final byte[] resultData = 
                HuffmanByteDecompressor.decompress(compressedRawData);
        
        assertTrue(Arrays.equals(rawData, resultData));
    }
    
    @Test
    public void decompressStressTest() {
        for (int i = 0; i < STRESS_TEST_ITERATIONS; ++i) {
            stressTest();
        }
    }
    
    private void stressTest() {
        final byte[] sourceData = TestUtils.getRawData();
        final byte[] compressedData = 
                HuffmanByteCompressor.compress(sourceData);
        
        final byte[] targetData = 
                HuffmanByteDecompressor.decompress(compressedData);
        
        assertTrue(Arrays.equals(sourceData, targetData));
    }
}

package io.github.coderodde.compressor.app;

import java.util.Arrays;
import org.junit.Test;
import static org.junit.Assert.*;

public class HuffmanDecodingTreeTest {
    
    @Test
    public void constructAndDecode() {
        final byte[] sourceData = { 45, 46, 47, 47, 46, 47 };
        final byte[] targetData = new byte[10];
        
        final ByteFrequencyDistribution wd = 
                ByteWeightDistributionBuilder
                        .buildByteWeightDistribution(sourceData);
        
        final ByteHuffmanCodeTable codeTable = 
                ByteHuffmanCodeTableBuilder.buildCode(wd);
        
        final ByteArrayCompressedDataWriter writer = 
                new ByteArrayCompressedDataWriter(targetData,
                                                  sourceData,
                                                  0,
                                                  codeTable);
        
        writer.write();
        
        final ByteHuffmanDecoderTree<Byte> decoderTree = 
                new ByteHuffmanDecoderTree<>(codeTable);
        
        final byte[] decompressedData = new byte[sourceData.length];
        
        final ByteArrayCompressedDataReader reader = 
                new ByteArrayCompressedDataReader(decompressedData, 
                                                  targetData, 
                                                  0,
                                                  decoderTree);
        
        reader.read();
        
        assertTrue(Arrays.equals(sourceData, decompressedData));
    }
}

package io.github.coderodde.compressor.app;

import java.util.Random;

public class TestUtils {
    
    private static final int SEED = 13;
    private static final int MAXIMUM_BYTE_ARRAY_LENGTH = 2_000;
    private static final Random RANDOM = new Random(SEED);
    
    private TestUtils() {
        
    }
    
    public static final byte[] getRawData() {
        final int length = 1 + RANDOM.nextInt(MAXIMUM_BYTE_ARRAY_LENGTH);
        final byte[] rawData = new byte[length];
        RANDOM.nextBytes(rawData);
        return rawData;
    }
}

Typical output

C:\Users\rodio\Documents>java -jar HuffmanCompressorApp.java-1.1.0.jar WarAndPeace.html
[INFO] Read all file bytes in 15 milliseconds.
[INFO] Compressed the data in 412 milliseconds.
[INFO] Written the compressed data in 19 milliseconds.

C:\Users\rodio\Documents>java -jar HuffmanCompressorApp.java-1.1.0.jar WarAndPeace.html.huf uncompressed.WarAndPeace.html
[INFO] Read the compressed data in 9 milliseconds.
[INFO] Decompressed the data in 137 milliseconds.
[INFO] Written the decompressed data in 24 milliseconds.

C:\Users\rodio\Documents>fc /b WarAndPeace.html uncompressed.WarAndPeace.html
Comparing files WarAndPeace.html and UNCOMPRESSED.WARANDPEACE.HTML
FC: no differences encountered

Critique request

Please, tell me anything that comes to mind.

\$\endgroup\$
0

1 Answer 1

2
\$\begingroup\$

random findings:

  • final classes - what is the advantage?

  • repeated code (contributes to an overall impression of verbose)

    • "exitIfFileMissing()" (or couldn't be "open"ed with wanted permission/locking?)
    • "logMilliSeconds()" (easier with aspect support or macro substitution?)
  • countBitsInRawData() does count bits. It returns something different:
    either it should be named accordingly (cf. countBytesInCodeHeader()), or the discrepancy be documented prominently.

  • raw data

    - what's that? Used for original data as well as for encoded data.
    I suggest original, encoded, decoded. (Compressed would be wishful thinking.)

  • data types for originalDataLength and code words
    There are files bigger than Integer.MAX_VALUE - File.length()returns long.
    What size alphabet, what file length call for even one code longer than Long.SIZE bits? (cf. BYTES_PER_CODEWORD_MAX)(Long.SIZE+1 for w.c. tree structure, with probabilities from histogram on the order of 50 TB (can't seem to wrap my head around it))
    Then, there is Length-limited Huffman coding.

  • bit index into encoded data should be managed by (Byte)HuffmanDecoder(Tree) rather than (ByteArray)CompressedDataReader

  • "use" of nio.ByteBuffer
    Instantiation for a single put()/get() paired with a conversion to/from byte[] is abuse.
    Use file mapping.

  • reverse()ing while writing, but not while reading deserves a comment.

  • version comments might include since, last modified, as of.

  • "builder"-class pairs ByteHuffmanCodeTable&ByteFrequencyDistribution
    A finger exercise in creative patterns? I think Java factories & builders should provide instances implementing an interface, not of (known/specified) classes.
    I feel neither ByteHuffmanCodeTable nor ByteFrequencyDistribution is complex enough to justify more than a factory method. (Depending on design/program pattern terminology, a builder would be something being used in multiple steps to finish the wanted instance.)


(random?) advice

  • in encoded files/long-lived data include a header "unambigously" identifying algorithm and parameters (such as endianness, class Configuration members).
    (Arrange for "file" length = header length + original length to imply "encoding" is identity.)
  • data rate pivots on putting more than one thread to task on bulk data. One conceptually trivial approach to benefit from multithreading is to use parallel streams. Semi-obvious in "static" encoding.
    Try to asses difficulties in decoding with multiple threads; investigate existing approaches.
\$\endgroup\$
1
  • \$\begingroup\$ (Something feels off with having class ByteHuffmanCodeTableBuilder in addition to ByteHuffmanCodeTable, ByteWeightDistributionBuilder in addition to ByteFrequencyDistribution, or ByteArrayCompressedDataReader.read() asking a ByteHuffmanDecoderTree instance for the previous code length just to compute the next starting bit index to pass into decode() of that very instance. Wish I could suggest clear and consistent improvements.) \$\endgroup\$ Commented Nov 24 at 17:58

You must log in to answer this question.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.