001/* 002 * SPDX-License-Identifier: Apache-2.0 003 * 004 * Copyright 2023-2026 The Enola <https://enola.dev> Authors 005 * 006 * Licensed under the Apache License, Version 2.0 (the "License"); 007 * you may not use this file except in compliance with the License. 008 * You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package dev.enola.common; 019 020import com.google.errorprone.annotations.Immutable; 021 022import java.nio.ByteBuffer; 023import java.nio.charset.Charset; 024import java.nio.charset.StandardCharsets; 025import java.security.SecureRandom; 026import java.util.Arrays; 027import java.util.UUID; 028 029/** 030 * Immutable Sequence of Bytes, of variable (but obviously fixed) length. 031 * 032 * <p>Typically intended to be used for "small"(-ish) size, like binary IDs, hashes, cryptographic 033 * keys, and such things; do not use this for "very large BLOBs". The hashCode is cached. 034 * 035 * <p>The <code>com.google.protobuf.ByteString</code> is very similar - but we don't want to depend 036 * on the ProtoBuf library JUST for having a type like this. Likewise, <code> 037 * com.google.crypto.tink.util.Bytes</code> is similar. 038 * 039 * <p>This intentionally does <b>not</b> implement <code>Iterable<Byte></code> to avoid boxing 040 * overhead. 041 */ 042@Immutable 043public final class ByteSeq implements Comparable<ByteSeq> { 044 045 // TODO Re-think if this is really the same as a com.google.common.io.ByteSource?! 046 // If concluding that it's not, then update JavaDoc to explain why... 047 048 // TODO Should this extend com.google.common.io.ByteSource?! 049 050 // TODO Should this have a static from(ByteSource) method? 051 052 // TODO Add String toBase64() and static ID fromBase64(String data) 053 // using https://www.baeldung.com/java-base64-encode-and-decode 054 055 // TODO Support substring (slice?) and concatenation, like Protobuf ByteString? 056 057 // TODO Add a ByteBuffer asReadOnlyByteBuffer() method? 058 059 // TODO Add startsWith(ByteSeq) and endsWith(ByteSeq) methods? 060 061 // TODO Add asInputStream() method? 062 063 public static final ByteSeq EMPTY = new ByteSeq(new byte[0]); 064 065 public static final class Builder { 066 private final byte[] bytes; 067 068 // TODO private final int position = 0; 069 070 private Builder(int size) { 071 bytes = new byte[size]; 072 } 073 074 public ByteSeq build() { 075 // TODO if (position != bytes.length) throw new IllegalStateException(); 076 return new ByteSeq(bytes); 077 } 078 079 // TODO public Builder add(int integer) { 080 081 public Builder add(ByteBuffer bb) { 082 if (!bb.isReadOnly()) { 083 throw new IllegalArgumentException("ByteBuffer !isReadOnly()"); 084 } 085 // TODO Write to current position offset 086 // TODO Take length as an argument 087 // TODO Check for Overflow 088 bb.get(bytes); 089 return this; 090 } 091 } 092 093 public static Builder builder(int size) { 094 return new Builder(size); 095 } 096 097 // TODO public static final ByteSeq fromMultibase(String multibase) { 098 099 /** 100 * Create a ByteSeq from an array of bytes. Prefer using the Builder instead of this, to avoid 101 * the implementation have to copy the array. 102 * 103 * @param bytes Bytes (which will be copied) 104 * @return the ByteSeq 105 */ 106 public static ByteSeq from(byte[] bytes) { 107 if (bytes.length == 0) { 108 return EMPTY; 109 } 110 return new ByteSeq(Arrays.copyOf(bytes, bytes.length)); 111 } 112 113 /** 114 * New ByteSeq from a String, in UTF-8. Inverse of {@link #asString()}. More efficient than 115 * (avoids 2nd re-copy) using {@link #from(byte[])} on {@link String#getBytes()}, and safer 116 * because it avoids accidentally using the (non-fixed) platform default charset. 117 */ 118 public static ByteSeq from(String string) { 119 return from(string, StandardCharsets.UTF_8); 120 } 121 122 public static ByteSeq from(String string, Charset charset) { 123 return new ByteSeq(string.getBytes(charset)); 124 } 125 126 /* 127 * Create a ByteSeq from a Protocol Buffer "bytes" field. 128 * 129 * @param proto the ByteString 130 * @return the ByteSeq 131 public static ByteSeq from(ByteString proto) { 132 return ByteSeq.builder(proto.size()).add(proto.asReadOnlyByteBuffer()).build(); 133 } 134 */ 135 136 // ? public static ByteSeq toByteSeq(Message proto) { return 137 // ByteSeq.copyFrom(proto.toByteArray()); } 138 139 public static ByteSeq from(UUID uuid) { 140 var byteBuffer = ByteBuffer.allocate(16); 141 byteBuffer.putLong(uuid.getMostSignificantBits()); 142 byteBuffer.putLong(uuid.getLeastSignificantBits()); 143 byteBuffer.position(0); 144 145 var builder = builder(16); 146 builder.add(byteBuffer.asReadOnlyBuffer()); 147 return builder.build(); 148 } 149 150 // Stolen from java.util.UUID: 151 private static class Holder { 152 static final SecureRandom numberGenerator = new SecureRandom(); 153 } 154 155 public static ByteSeq random(int size) { 156 // As in java.util.UUID#randomUUID: 157 // (See also dev.enola.data.id.UUID_IRI constructor!) 158 SecureRandom ng = Holder.numberGenerator; 159 byte[] randomBytes = new byte[size]; 160 ng.nextBytes(randomBytes); 161 return new ByteSeq(randomBytes); 162 } 163 164 @SuppressWarnings("Immutable") // Holy promise never to change the bytes! 165 private final byte[] bytes; 166 167 @SuppressWarnings("Immutable") // Holy promise never to use only as cache! 168 private transient int hashCode; 169 170 private ByteSeq(byte[] bytes) { 171 this.bytes = bytes; 172 } 173 174 public byte[] toBytes() { 175 return Arrays.copyOf(bytes, bytes.length); 176 } 177 178 public int size() { 179 return bytes.length; 180 } 181 182 public byte get(int index) { 183 return bytes[index]; 184 } 185 186 // public String toString(Base base) { return Multibase.encode(Base.Base32, id.toByteArray()); } 187 /* 188 public ByteString toByteString() { 189 return ByteString.copyFrom(bytes); 190 } 191 */ 192 // TODO Rename toUUID() to asUUID() for consistency with asString()? 193 public UUID toUUID() { 194 if (bytes.length != 16) { 195 throw new IllegalStateException( 196 "toUUID() is currently only supported for length == 16, not: " + bytes.length); 197 } 198 ByteBuffer bb = ByteBuffer.wrap(bytes); 199 long high = bb.getLong(); 200 long low = bb.getLong(); 201 return new UUID(high, low); 202 } 203 204 /** Return String from bytes decoded as UTF-8. Inverse of {@link #from(String)}. */ 205 public String asString() { 206 return new String(bytes, StandardCharsets.UTF_8); 207 } 208 209 /** 210 * Returns {@code true} if the size is {@code 0}, {@code false} otherwise. 211 * 212 * @return true if this is zero bytes long 213 */ 214 public boolean isEmpty() { 215 return size() == 0; 216 } 217 218 /** 219 * Return debug information about this object. 220 * 221 * @see #asString() 222 */ 223 @Override 224 public String toString() { 225 return "ByteSeq@" + Integer.toHexString(hashCode()) + "; size=" + size(); 226 } 227 228 @Override 229 public int hashCode() { 230 if (hashCode == 0) { 231 hashCode = Arrays.hashCode(bytes); 232 } 233 return hashCode; 234 } 235 236 @Override 237 public boolean equals(Object obj) { 238 if (this == obj) { 239 return true; 240 } 241 if (obj == null || getClass() != obj.getClass()) { 242 return false; 243 } 244 var other = (ByteSeq) obj; 245 if (hashCode() != other.hashCode()) { 246 return false; 247 } 248 return Arrays.equals(bytes, other.bytes); 249 } 250 251 @Override 252 public int compareTo(ByteSeq other) { 253 return Arrays.compare(bytes, other.bytes); 254 } 255}