001/*
002 * SPDX-License-Identifier: Apache-2.0
003 *
004 * Copyright 2024-2026 The Enola <https://enola.dev> Authors
005 *
006 * Licensed under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 *     https://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package dev.enola.common.io.mediatype;
019
020import com.google.common.io.ByteSource;
021import com.google.common.net.MediaType;
022
023import dev.enola.common.io.resource.AbstractResource;
024import dev.enola.common.io.resource.Resource;
025
026import java.net.URI;
027import java.nio.charset.Charset;
028import java.util.Optional;
029
030/**
031 * Detects the Charset of a Resource.
032 *
033 * <p>This interface is typically not used directly by {@link Resource} API users (who would just
034 * use {@link MediaType#charset()} on an {@link AbstractResource#mediaType()}). Instead, it is
035 * normally implemented by (some) <code>*MediaType</code> API implementations, for Charset detection
036 * that is specific to a given MediaType (if any). For example, RFC 4627 §3 specifies how to
037 * determine the encoding of JSON, or https://yaml.org/spec §5.2. specifies ditto for YAML.
038 */
039public interface ResourceCharsetDetector {
040
041    /**
042     * Detect the {@link Charset} by "sniffing" the source e.g. for "<a
043     * href="https://en.wikipedia.org/wiki/Byte_order_mark">BOM detection</a>". The URI argument is
044     * only uses for error messages. (It's never "accessed".)
045     */
046    Optional<Charset> detectCharset(URI uri, ByteSource source);
047}