001/* 002 * SPDX-License-Identifier: Apache-2.0 003 * 004 * Copyright 2024-2026 The Enola <https://enola.dev> Authors 005 * 006 * Licensed under the Apache License, Version 2.0 (the "License"); 007 * you may not use this file except in compliance with the License. 008 * You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package dev.enola.common.io.mediatype; 019 020import com.google.common.io.ByteSource; 021import com.google.common.net.MediaType; 022 023import dev.enola.common.io.resource.AbstractResource; 024import dev.enola.common.io.resource.Resource; 025 026import java.net.URI; 027import java.nio.charset.Charset; 028import java.util.Optional; 029 030/** 031 * Detects the Charset of a Resource. 032 * 033 * <p>This interface is typically not used directly by {@link Resource} API users (who would just 034 * use {@link MediaType#charset()} on an {@link AbstractResource#mediaType()}). Instead, it is 035 * normally implemented by (some) <code>*MediaType</code> API implementations, for Charset detection 036 * that is specific to a given MediaType (if any). For example, RFC 4627 §3 specifies how to 037 * determine the encoding of JSON, or https://yaml.org/spec §5.2. specifies ditto for YAML. 038 */ 039public interface ResourceCharsetDetector { 040 041 /** 042 * Detect the {@link Charset} by "sniffing" the source e.g. for "<a 043 * href="https://en.wikipedia.org/wiki/Byte_order_mark">BOM detection</a>". The URI argument is 044 * only uses for error messages. (It's never "accessed".) 045 */ 046 Optional<Charset> detectCharset(URI uri, ByteSource source); 047}