001/*
002 * SPDX-License-Identifier: Apache-2.0
003 *
004 * Copyright 2023-2026 The Enola <https://enola.dev> Authors
005 *
006 * Licensed under the Apache License, Version 2.0 (the "License");
007 * you may not use this file except in compliance with the License.
008 * You may obtain a copy of the License at
009 *
010 *     https://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package dev.enola.common.io.iri;
019
020import static java.util.Collections.emptyMap;
021
022import com.google.common.base.Splitter;
023import com.google.common.base.Strings;
024import com.google.common.io.Files;
025import com.google.common.net.MediaType;
026
027import dev.enola.common.context.Context;
028import dev.enola.common.context.TLC;
029
030import org.jspecify.annotations.Nullable;
031
032import java.io.IOException;
033import java.net.URI;
034import java.net.URISyntaxException;
035import java.net.URLEncoder;
036import java.nio.charset.Charset;
037import java.nio.charset.StandardCharsets;
038import java.nio.file.FileSystems;
039import java.nio.file.Path;
040import java.util.HashMap;
041import java.util.Map;
042import java.util.function.Supplier;
043
044public final class URIs {
045    // see also class dev.enola.common.io.iri.IRIs
046
047    // TODO Eventually completely replace all this with the intended new IRI class!
048
049    // TODO Review if all this String instead of URI-based processing could be removed and replaced
050    // perhaps by fully adopting https://github.com/enola-dev/enola/issues/797; and/or alternatively
051    // with first encoding invalid special characters in URIs; see the related TBD in
052    // FileGlobResolver.
053
054    // URI Query Parameter Names
055    private static final String MEDIA_TYPE = "mediaType";
056    private static final String CHARSET = "charset"; // as in MediaType#CHARSET_ATTRIBUTE
057
058    private static final Splitter AMPERSAND_SPLITTER =
059            Splitter.on('&').omitEmptyStrings().trimResults();
060
061    public record MediaTypeAndOrCharset(@Nullable String mediaType, @Nullable String charset) {}
062
063    public static boolean hasQueryParameter(URI uri, String key) {
064        var queryMap = getQueryMap(uri);
065        return queryMap.containsKey(key);
066    }
067
068    public static MediaTypeAndOrCharset getMediaTypeAndCharset(URI uri) {
069        var queryMap = getQueryMap(uri);
070        var charsetParameter = queryMap.get(CHARSET);
071        var mediaTypeParameter = queryMap.get(MEDIA_TYPE.toLowerCase());
072        if (mediaTypeParameter == null) mediaTypeParameter = queryMap.get(MEDIA_TYPE);
073        return new MediaTypeAndOrCharset(mediaTypeParameter, charsetParameter);
074    }
075
076    public static @Nullable String getCharset(URI uri) {
077        var mediaTypeAndCharset = getMediaTypeAndCharset(uri);
078        if (mediaTypeAndCharset.charset != null) return mediaTypeAndCharset.charset;
079        if (mediaTypeAndCharset.mediaType != null) {
080            var optMediaTypeCharset = MediaType.parse(mediaTypeAndCharset.mediaType);
081            if (optMediaTypeCharset.charset().isPresent())
082                return optMediaTypeCharset.charset().get().toString();
083        }
084        return null;
085    }
086
087    public static URI addMediaType(URI uri, MediaType mediaType) {
088        return addQueryParameter(uri, MEDIA_TYPE, mediaType.toString().replace(" ", ""));
089    }
090
091    public static URI addCharset(URI uri, Charset charset) {
092        return addQueryParameter(uri, CHARSET, charset.toString());
093    }
094
095    public static URI addQuery(URI uri, Map<String, String> parameters) {
096        for (var parameter : parameters.entrySet()) {
097            uri = addQueryParameter(uri, parameter.getKey(), parameter.getValue());
098        }
099        return uri;
100    }
101
102    /**
103     * Returns an URI with everything except the query parameters of the uri (1st) parameter, but
104     * the query parameters of originalUriWithQuery (2nd) parameter - IFF the uri (1st) parameter
105     * has no query; otherwise just returns the uri (1st) parameter as-is.
106     *
107     * <p>For example, this method can be used to preserve query parameters when changing the URI
108     * path.
109     */
110    public static URI addQuery(URI uri, URI originalUriWithQuery) {
111        if (Strings.isNullOrEmpty(originalUriWithQuery.getQuery())) return uri;
112        if (!Strings.isNullOrEmpty(uri.getQuery())) return uri;
113
114        return java.net.URI.create(uri + "?" + originalUriWithQuery.getQuery());
115    }
116
117    public static String addQuery(String string, Map<String, String> parameters) {
118        return addQuery(java.net.URI.create(string), parameters).toString();
119    }
120
121    private static URI addQueryParameter(URI uri, String key, String value) {
122        String connector;
123        if (uri.getQuery() != null && uri.getQuery().contains(key)) {
124            return uri;
125        } else if (uri.getQuery() == null && !uri.getSchemeSpecificPart().contains("?")) {
126            connector = "?";
127        } else if (uri.getQuery() == null && uri.getSchemeSpecificPart().equals("?")) {
128            // Handle special (but technically invalid cases) of "scheme:?" URIs with "empty" query
129            // This is currently used by EmptyResource.EMPTY_URI (but may be changed later)
130            connector = "";
131        } else {
132            connector = "&";
133        }
134        return java.net.URI.create(uri + connector + key + "=" + encodeQueryParameterValue(value));
135    }
136
137    private static String encodeQueryParameterValue(String value) {
138        return URLEncoder.encode(value, StandardCharsets.UTF_8).replace("+", "%20");
139    }
140
141    // package-local not public (for now)
142    static String getQueryString(String uri) {
143        var qp = uri.indexOf('?');
144        if (qp > -1) {
145            // Handle Glob URIs, like e.g. "file:/tmp//?.txt"
146            if (uri.indexOf('=', qp) == -1) return "";
147            var fp = uri.indexOf('#');
148            if (fp == -1) {
149                return uri.substring(qp + 1);
150            } else {
151                return uri.substring(qp + 1, fp);
152            }
153        }
154        return "";
155    }
156
157    public static Map<String, String> getQueryMap(String uri) {
158        return getQueryMapGivenQueryString(getQueryString(uri));
159    }
160
161    private static Map<String, String> getQueryMapGivenQueryString(String query) {
162        if (Strings.isNullOrEmpty(query)) {
163            return emptyMap();
164        }
165        Map<String, String> map = new HashMap<>();
166        AMPERSAND_SPLITTER.split(query).forEach(queryParameter -> put(queryParameter, map));
167        return map;
168    }
169
170    public static Map<String, String> getQueryMap(URI uri) {
171        if (uri == null) return emptyMap();
172
173        String query = uri.getQuery();
174        if (Strings.isNullOrEmpty(query)) {
175            var part = uri.getSchemeSpecificPart();
176            try {
177                query = getQueryString(part);
178            } catch (StringIndexOutOfBoundsException e) {
179                throw new IllegalStateException(uri.toString(), e);
180            }
181        }
182        return getQueryMapGivenQueryString(query);
183    }
184
185    private static void put(String queryParameter, Map<String, String> map) {
186        var p = queryParameter.indexOf('=');
187        if (p == -1) {
188            map.put(queryParameter, null);
189        } else {
190            var key = queryParameter.substring(0, p);
191            if (map.containsKey(key))
192                throw new IllegalArgumentException(
193                        "URI Query Parameter has duplicate key: " + queryParameter);
194            var value = queryParameter.substring(p + 1);
195            map.put(key, value);
196        }
197    }
198
199    /**
200     * Get a {@link Path} from an {@link URI}. This method is used internally by {@code
201     * dev.enola.common.io.resource.Resource} framework implementations, and typically shouldn't be
202     * called directly by users. Please see the {@code dev.enola.common.io.resource.FileResource}
203     * for more related background.
204     */
205    public static Path getFilePath(URI uri) {
206        uri = absolutify(uri);
207        uri = dropQueryAndFragment(uri);
208        try {
209            return Path.of(uri);
210        } catch (IllegalArgumentException e) {
211            throw new IllegalArgumentException("Invalid URI: " + uri, e);
212        }
213    }
214
215    public static Path getFilePath(String uri) {
216        var scheme = getScheme(uri);
217        var authority = ""; // TODO Implement getAuthority(String uri)
218        var path = getPath(uri);
219        return getFilePath(scheme, "", path);
220    }
221
222    private static Path getFilePath(String scheme, String authority, String path) {
223        // TODO Don't hard-code this to file: but use MoreFileSystems.URI_SCHEMAS, somehow...
224        if ("file".equals(scheme)) {
225            return FileSystems.getDefault().getPath(path);
226        } else
227            try {
228                URI fsURI = new URI(scheme, authority, null, null, null);
229                var fs = FileSystems.getFileSystem(fsURI);
230                return fs.getPath(path);
231            } catch (URISyntaxException e) {
232                // This is rather unexpected...
233                throw new IllegalStateException(
234                        "Failed to create FileSystem Authority URI: " + scheme + ":" + path, e);
235            }
236    }
237
238    public static String getScheme(String iri) {
239        if (iri == null) return "";
240        var p = iri.indexOf(':');
241        if (p == -1) return "";
242        return iri.substring(0, p);
243    }
244
245    public static boolean hasScheme(String iri) {
246        return iri.indexOf(':') > 0;
247    }
248
249    private static boolean hasScheme(URI uri) {
250        return !Strings.isNullOrEmpty(uri.getScheme());
251    }
252
253    static String getSchemeSpecificPart(String iri) {
254        if (iri == null) return "";
255        var p = iri.indexOf(':');
256        if (p == -1) return "";
257        return iri.substring(p + 1);
258    }
259
260    /**
261     * Get the "path"-like component of any URI. Similar to {@link URI#getPath()}, but also works
262     * e.g. for "non-standard" relative "file:hello.txt" URIs, and correctly chops off query
263     * arguments and fragments.
264     *
265     * <p>TODO This does not yet decode correctly! :=((
266     *
267     * <p>TODO Is this really required?! Re-review which tests URI#getPath() fails, and why...
268     */
269    @Deprecated // Get rid of this, it's stupid, buggy, and bad.
270    // TODO This doesn't correctly handle URIs with an authority!
271    public static String getPath(URI uri) {
272        return chopFragmentAndQuery(uri.getSchemeSpecificPart());
273    }
274
275    public static String getPath(String uri) {
276        return chopFragmentAndQuery(getSchemeSpecificPart(uri));
277    }
278
279    private static String chopFragmentAndQuery(String ssp) {
280        var chop = ssp.indexOf('?');
281
282        // Handle Glob URIs, like e.g. "file:/tmp//?.txt"
283        if (ssp.indexOf('=', chop) == -1) chop = -1;
284
285        if (chop == -1) chop = ssp.indexOf('#');
286        if (chop == -1) chop = ssp.length();
287
288        return ssp.substring(0, chop);
289    }
290
291    private static String chopLastSlash(String path) {
292        if (!path.endsWith("/")) return path;
293        else return path.substring(0, path.length() - 1);
294    }
295
296    /**
297     * Extracts the "file name" from an URI, or the empty string if there is none. The filename is
298     * simply the last part of the path of the URI. It COULD be a directory! Works for file: http:
299     * and other even for "weird" URIs, such as those from Classpath URLs.
300     *
301     * <p>See also {@link com.google.common.io.Files#getFileExtension(String)} and @{@link
302     * com.google.common.io.Files#getNameWithoutExtension(String)}.
303     */
304    public static String getFilename(URI uri) {
305        return getLastPathSegmentOrHost(uri, false);
306    }
307
308    public static String getFilenameWithoutExtension(URI uri, String... extensions) {
309        var fileName = getFilename(uri);
310        for (String extension : extensions) {
311            if (fileName.endsWith(extension))
312                return fileName.substring(0, fileName.length() - extension.length());
313        }
314        return Files.getNameWithoutExtension(fileName);
315    }
316
317    public static boolean hasExtension(URI uri, String... extensions) {
318        var path = uri.getPath();
319        if (path == null) return false;
320        for (String extension : extensions) if (path.endsWith(extension)) return true;
321        return false;
322    }
323
324    public static String getFilenameOrLastPathSegmentOrHost(URI uri) {
325        return getLastPathSegmentOrHost(uri, true);
326    }
327
328    private static String getLastPathSegmentOrHost(URI uri, boolean evenIfSlashed) {
329        String path;
330        var scheme = uri.getScheme();
331        if ("file".equals(scheme)) {
332            path = chopFragmentAndQuery(uri.getPath());
333        } else if ("jar".equals(scheme)) {
334            return chopFragmentAndQuery(
335                    getFilename(java.net.URI.create(uri.getSchemeSpecificPart())));
336        } else if ("http".equals(scheme) || "https".equals(scheme)) {
337            path = chopFragmentAndQuery(uri.getPath());
338        } else {
339            path = getPath(uri);
340        }
341
342        if (!evenIfSlashed && (Strings.isNullOrEmpty(path) || path.endsWith("/"))) {
343            return "";
344        }
345        if (evenIfSlashed && (Strings.isNullOrEmpty(path) || "/".equals(path))) {
346            var host = uri.getHost();
347            return host;
348        }
349
350        int p;
351        if (!path.endsWith("/")) p = path.lastIndexOf('/');
352        else p = path.substring(0, path.length() - 1).lastIndexOf('/');
353
354        return p > -1 ? chopLastSlash(path.substring(p + 1)) : chopLastSlash(path);
355    }
356
357    /**
358     * This converts e.g. "file:relative.txt" to "file:///tmp/.../relative.txt" (depending on the
359     * current working directory, obviously). It loses any query parameters and fragments.
360     */
361    @Deprecated // TODO Get rid of the support for the fake "file:relative.txt" syntax
362    public static URI rel2abs(URI uri) {
363        if (uri == null) return uri;
364        if (!"file".equals(uri.getScheme())) return uri;
365        if (!uri.isOpaque()) return uri;
366
367        String relativePath = uri.getSchemeSpecificPart();
368        return Path.of(relativePath).toAbsolutePath().toUri();
369    }
370
371    public static URI absolutify(URI uri) {
372        if (hasScheme(uri)) return uri;
373        var base = TLC.optional(ContextKeys.BASE).orElseThrow(ex(uri));
374        return base.resolve(uri);
375    }
376
377    public static String absolutify(String uri) {
378        if (hasScheme(uri)) return uri;
379        var base = TLC.optional(ContextKeys.BASE).orElseThrow(ex(uri));
380        return resolve(base.toString(), uri);
381    }
382
383    private static String resolve(String base, String child) {
384        if (!child.startsWith("/")) return base + child;
385        return getScheme(base) + "://" + child;
386    }
387
388    private static Supplier<IllegalStateException> ex(Object uri) {
389        return () ->
390                new IllegalStateException("Missing ContextKeys.BASE on TLC to resolve: " + uri);
391    }
392
393    public static URI dropQueryAndFragment(URI uri) {
394        if (uri.getRawQuery() == null && uri.getRawFragment() == null) return uri;
395        try {
396            return new URI(
397                    uri.getScheme(),
398                    uri.getUserInfo(),
399                    uri.getHost(),
400                    uri.getPort(),
401                    uri.getPath(),
402                    null,
403                    null);
404        } catch (URISyntaxException e) {
405            throw new IllegalArgumentException("TODO FIXME" + uri, e);
406        }
407    }
408
409    public static String dropQueryAndFragment(String uri) {
410        return dropQueryAndFragment(URI.create(uri)).toString();
411    }
412
413    public static URI getBase(URI uri) throws IOException {
414        try {
415            var path = uri.getPath();
416            if (path == null)
417                // throw new IllegalArgumentException("Cannot get Path from: " + uri);
418                return URI.create("");
419            if (path.endsWith("/"))
420                if (uri.getQuery() == null && uri.getFragment() == null) return uri;
421                // NB: Both query and fragment are *intentionally* dropped in this case!
422                else return new URI(uri.getScheme(), uri.getAuthority(), path, null);
423
424            path = path.substring(0, path.lastIndexOf('/'));
425            // Again, same as above, we're ignoring query & fragment
426            return new URI(uri.getScheme(), uri.getAuthority(), path, null);
427        } catch (URISyntaxException e) {
428            throw new IOException("TODO Why is this new URI invalid: " + uri, e);
429        }
430    }
431
432    public static URI addFragment(URI uri, String fragment) {
433        if (Strings.isNullOrEmpty(fragment)) return uri;
434        try {
435            return new URI(
436                    uri.getScheme(),
437                    uri.getAuthority(),
438                    uri.getPath(),
439                    uri.getQuery(),
440                    (uri.getFragment() != null ? uri.getFragment() : "") + fragment);
441        } catch (URISyntaxException e) {
442            throw new IllegalArgumentException("Failed to addFragment", e);
443        }
444    }
445
446    // NB: There is intentionally no URI create(String uri) method here!
447    // Do simply use {@link URI#create(String)} or {@link URI#URI(String)}.
448    // If you find that it does not correctly set the URI's Query, just use
449    // "scheme:/thing?ping=pong=pang#fragment" instead of "scheme:thing?ping=pong=pang#fragment".
450    /*
451        public static URI create(String uri) {
452            String scheme = getScheme(uri);
453            String authority = null;
454            String path = getPath(uri);
455            String query = getQueryString(uri);
456            String fragment = getFragment(uri);
457            try {
458                // This fails if path doesn't start with '/' so this entire method is pointless!
459                return new URI(scheme, authority, path, query, fragment);
460            } catch (URISyntaxException e) {
461                throw new IllegalArgumentException("Invalid Syntax: " + uri, e);
462            }
463        }
464
465        private static String getFragment(String uri) {
466            return uri.lastIndexOf('#') == -1 ? uri : uri.substring(uri.lastIndexOf('#') + 1);
467        }
468    */
469
470    // TODO Review if getScheme(), getPath(), getQueryString(), getFragment() are *REALLY* needed?!
471
472    @Deprecated // TODO This automagic " " => %20 is fishy... who needs this, why?!
473    public static String encode(String uri) {
474        return uri.replace(" ", "%20");
475    }
476
477    public static URI parse(String uri) throws URISyntaxException {
478        return new URI(encode(uri));
479    }
480
481    private URIs() {}
482
483    public enum ContextKeys implements Context.Key<URI> {
484
485        /**
486         * Base URI. Used to resolve <a
487         * href="https://en.wikipedia.org/wiki/Uniform_Resource_Identifier#URI_references">relative
488         * reference URIs</a> which don't have a scheme.
489         */
490        // TODO Support for this is new and emerging, and not yet fully comprehensive...
491        BASE
492    }
493}