001/* 002 * SPDX-License-Identifier: Apache-2.0 003 * 004 * Copyright 2023-2026 The Enola <https://enola.dev> Authors 005 * 006 * Licensed under the Apache License, Version 2.0 (the "License"); 007 * you may not use this file except in compliance with the License. 008 * You may obtain a copy of the License at 009 * 010 * https://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package dev.enola.common.io.iri; 019 020import static java.util.Collections.emptyMap; 021 022import com.google.common.base.Splitter; 023import com.google.common.base.Strings; 024import com.google.common.io.Files; 025import com.google.common.net.MediaType; 026 027import dev.enola.common.context.Context; 028import dev.enola.common.context.TLC; 029 030import org.jspecify.annotations.Nullable; 031 032import java.io.IOException; 033import java.net.URI; 034import java.net.URISyntaxException; 035import java.net.URLEncoder; 036import java.nio.charset.Charset; 037import java.nio.charset.StandardCharsets; 038import java.nio.file.FileSystems; 039import java.nio.file.Path; 040import java.util.HashMap; 041import java.util.Map; 042import java.util.function.Supplier; 043 044public final class URIs { 045 // see also class dev.enola.common.io.iri.IRIs 046 047 // TODO Eventually completely replace all this with the intended new IRI class! 048 049 // TODO Review if all this String instead of URI-based processing could be removed and replaced 050 // perhaps by fully adopting https://github.com/enola-dev/enola/issues/797; and/or alternatively 051 // with first encoding invalid special characters in URIs; see the related TBD in 052 // FileGlobResolver. 053 054 // URI Query Parameter Names 055 private static final String MEDIA_TYPE = "mediaType"; 056 private static final String CHARSET = "charset"; // as in MediaType#CHARSET_ATTRIBUTE 057 058 private static final Splitter AMPERSAND_SPLITTER = 059 Splitter.on('&').omitEmptyStrings().trimResults(); 060 061 public record MediaTypeAndOrCharset(@Nullable String mediaType, @Nullable String charset) {} 062 063 public static boolean hasQueryParameter(URI uri, String key) { 064 var queryMap = getQueryMap(uri); 065 return queryMap.containsKey(key); 066 } 067 068 public static MediaTypeAndOrCharset getMediaTypeAndCharset(URI uri) { 069 var queryMap = getQueryMap(uri); 070 var charsetParameter = queryMap.get(CHARSET); 071 var mediaTypeParameter = queryMap.get(MEDIA_TYPE.toLowerCase()); 072 if (mediaTypeParameter == null) mediaTypeParameter = queryMap.get(MEDIA_TYPE); 073 return new MediaTypeAndOrCharset(mediaTypeParameter, charsetParameter); 074 } 075 076 public static @Nullable String getCharset(URI uri) { 077 var mediaTypeAndCharset = getMediaTypeAndCharset(uri); 078 if (mediaTypeAndCharset.charset != null) return mediaTypeAndCharset.charset; 079 if (mediaTypeAndCharset.mediaType != null) { 080 var optMediaTypeCharset = MediaType.parse(mediaTypeAndCharset.mediaType); 081 if (optMediaTypeCharset.charset().isPresent()) 082 return optMediaTypeCharset.charset().get().toString(); 083 } 084 return null; 085 } 086 087 public static URI addMediaType(URI uri, MediaType mediaType) { 088 return addQueryParameter(uri, MEDIA_TYPE, mediaType.toString().replace(" ", "")); 089 } 090 091 public static URI addCharset(URI uri, Charset charset) { 092 return addQueryParameter(uri, CHARSET, charset.toString()); 093 } 094 095 public static URI addQuery(URI uri, Map<String, String> parameters) { 096 for (var parameter : parameters.entrySet()) { 097 uri = addQueryParameter(uri, parameter.getKey(), parameter.getValue()); 098 } 099 return uri; 100 } 101 102 /** 103 * Returns an URI with everything except the query parameters of the uri (1st) parameter, but 104 * the query parameters of originalUriWithQuery (2nd) parameter - IFF the uri (1st) parameter 105 * has no query; otherwise just returns the uri (1st) parameter as-is. 106 * 107 * <p>For example, this method can be used to preserve query parameters when changing the URI 108 * path. 109 */ 110 public static URI addQuery(URI uri, URI originalUriWithQuery) { 111 if (Strings.isNullOrEmpty(originalUriWithQuery.getQuery())) return uri; 112 if (!Strings.isNullOrEmpty(uri.getQuery())) return uri; 113 114 return java.net.URI.create(uri + "?" + originalUriWithQuery.getQuery()); 115 } 116 117 public static String addQuery(String string, Map<String, String> parameters) { 118 return addQuery(java.net.URI.create(string), parameters).toString(); 119 } 120 121 private static URI addQueryParameter(URI uri, String key, String value) { 122 String connector; 123 if (uri.getQuery() != null && uri.getQuery().contains(key)) { 124 return uri; 125 } else if (uri.getQuery() == null && !uri.getSchemeSpecificPart().contains("?")) { 126 connector = "?"; 127 } else if (uri.getQuery() == null && uri.getSchemeSpecificPart().equals("?")) { 128 // Handle special (but technically invalid cases) of "scheme:?" URIs with "empty" query 129 // This is currently used by EmptyResource.EMPTY_URI (but may be changed later) 130 connector = ""; 131 } else { 132 connector = "&"; 133 } 134 return java.net.URI.create(uri + connector + key + "=" + encodeQueryParameterValue(value)); 135 } 136 137 private static String encodeQueryParameterValue(String value) { 138 return URLEncoder.encode(value, StandardCharsets.UTF_8).replace("+", "%20"); 139 } 140 141 // package-local not public (for now) 142 static String getQueryString(String uri) { 143 var qp = uri.indexOf('?'); 144 if (qp > -1) { 145 // Handle Glob URIs, like e.g. "file:/tmp//?.txt" 146 if (uri.indexOf('=', qp) == -1) return ""; 147 var fp = uri.indexOf('#'); 148 if (fp == -1) { 149 return uri.substring(qp + 1); 150 } else { 151 return uri.substring(qp + 1, fp); 152 } 153 } 154 return ""; 155 } 156 157 public static Map<String, String> getQueryMap(String uri) { 158 return getQueryMapGivenQueryString(getQueryString(uri)); 159 } 160 161 private static Map<String, String> getQueryMapGivenQueryString(String query) { 162 if (Strings.isNullOrEmpty(query)) { 163 return emptyMap(); 164 } 165 Map<String, String> map = new HashMap<>(); 166 AMPERSAND_SPLITTER.split(query).forEach(queryParameter -> put(queryParameter, map)); 167 return map; 168 } 169 170 public static Map<String, String> getQueryMap(URI uri) { 171 if (uri == null) return emptyMap(); 172 173 String query = uri.getQuery(); 174 if (Strings.isNullOrEmpty(query)) { 175 var part = uri.getSchemeSpecificPart(); 176 try { 177 query = getQueryString(part); 178 } catch (StringIndexOutOfBoundsException e) { 179 throw new IllegalStateException(uri.toString(), e); 180 } 181 } 182 return getQueryMapGivenQueryString(query); 183 } 184 185 private static void put(String queryParameter, Map<String, String> map) { 186 var p = queryParameter.indexOf('='); 187 if (p == -1) { 188 map.put(queryParameter, null); 189 } else { 190 var key = queryParameter.substring(0, p); 191 if (map.containsKey(key)) 192 throw new IllegalArgumentException( 193 "URI Query Parameter has duplicate key: " + queryParameter); 194 var value = queryParameter.substring(p + 1); 195 map.put(key, value); 196 } 197 } 198 199 /** 200 * Get a {@link Path} from an {@link URI}. This method is used internally by {@code 201 * dev.enola.common.io.resource.Resource} framework implementations, and typically shouldn't be 202 * called directly by users. Please see the {@code dev.enola.common.io.resource.FileResource} 203 * for more related background. 204 */ 205 public static Path getFilePath(URI uri) { 206 uri = absolutify(uri); 207 uri = dropQueryAndFragment(uri); 208 try { 209 return Path.of(uri); 210 } catch (IllegalArgumentException e) { 211 throw new IllegalArgumentException("Invalid URI: " + uri, e); 212 } 213 } 214 215 public static Path getFilePath(String uri) { 216 var scheme = getScheme(uri); 217 var authority = ""; // TODO Implement getAuthority(String uri) 218 var path = getPath(uri); 219 return getFilePath(scheme, "", path); 220 } 221 222 private static Path getFilePath(String scheme, String authority, String path) { 223 // TODO Don't hard-code this to file: but use MoreFileSystems.URI_SCHEMAS, somehow... 224 if ("file".equals(scheme)) { 225 return FileSystems.getDefault().getPath(path); 226 } else 227 try { 228 URI fsURI = new URI(scheme, authority, null, null, null); 229 var fs = FileSystems.getFileSystem(fsURI); 230 return fs.getPath(path); 231 } catch (URISyntaxException e) { 232 // This is rather unexpected... 233 throw new IllegalStateException( 234 "Failed to create FileSystem Authority URI: " + scheme + ":" + path, e); 235 } 236 } 237 238 public static String getScheme(String iri) { 239 if (iri == null) return ""; 240 var p = iri.indexOf(':'); 241 if (p == -1) return ""; 242 return iri.substring(0, p); 243 } 244 245 public static boolean hasScheme(String iri) { 246 return iri.indexOf(':') > 0; 247 } 248 249 private static boolean hasScheme(URI uri) { 250 return !Strings.isNullOrEmpty(uri.getScheme()); 251 } 252 253 static String getSchemeSpecificPart(String iri) { 254 if (iri == null) return ""; 255 var p = iri.indexOf(':'); 256 if (p == -1) return ""; 257 return iri.substring(p + 1); 258 } 259 260 /** 261 * Get the "path"-like component of any URI. Similar to {@link URI#getPath()}, but also works 262 * e.g. for "non-standard" relative "file:hello.txt" URIs, and correctly chops off query 263 * arguments and fragments. 264 * 265 * <p>TODO This does not yet decode correctly! :=(( 266 * 267 * <p>TODO Is this really required?! Re-review which tests URI#getPath() fails, and why... 268 */ 269 @Deprecated // Get rid of this, it's stupid, buggy, and bad. 270 // TODO This doesn't correctly handle URIs with an authority! 271 public static String getPath(URI uri) { 272 return chopFragmentAndQuery(uri.getSchemeSpecificPart()); 273 } 274 275 public static String getPath(String uri) { 276 return chopFragmentAndQuery(getSchemeSpecificPart(uri)); 277 } 278 279 private static String chopFragmentAndQuery(String ssp) { 280 var chop = ssp.indexOf('?'); 281 282 // Handle Glob URIs, like e.g. "file:/tmp//?.txt" 283 if (ssp.indexOf('=', chop) == -1) chop = -1; 284 285 if (chop == -1) chop = ssp.indexOf('#'); 286 if (chop == -1) chop = ssp.length(); 287 288 return ssp.substring(0, chop); 289 } 290 291 private static String chopLastSlash(String path) { 292 if (!path.endsWith("/")) return path; 293 else return path.substring(0, path.length() - 1); 294 } 295 296 /** 297 * Extracts the "file name" from an URI, or the empty string if there is none. The filename is 298 * simply the last part of the path of the URI. It COULD be a directory! Works for file: http: 299 * and other even for "weird" URIs, such as those from Classpath URLs. 300 * 301 * <p>See also {@link com.google.common.io.Files#getFileExtension(String)} and @{@link 302 * com.google.common.io.Files#getNameWithoutExtension(String)}. 303 */ 304 public static String getFilename(URI uri) { 305 return getLastPathSegmentOrHost(uri, false); 306 } 307 308 public static String getFilenameWithoutExtension(URI uri, String... extensions) { 309 var fileName = getFilename(uri); 310 for (String extension : extensions) { 311 if (fileName.endsWith(extension)) 312 return fileName.substring(0, fileName.length() - extension.length()); 313 } 314 return Files.getNameWithoutExtension(fileName); 315 } 316 317 public static boolean hasExtension(URI uri, String... extensions) { 318 var path = uri.getPath(); 319 if (path == null) return false; 320 for (String extension : extensions) if (path.endsWith(extension)) return true; 321 return false; 322 } 323 324 public static String getFilenameOrLastPathSegmentOrHost(URI uri) { 325 return getLastPathSegmentOrHost(uri, true); 326 } 327 328 private static String getLastPathSegmentOrHost(URI uri, boolean evenIfSlashed) { 329 String path; 330 var scheme = uri.getScheme(); 331 if ("file".equals(scheme)) { 332 path = chopFragmentAndQuery(uri.getPath()); 333 } else if ("jar".equals(scheme)) { 334 return chopFragmentAndQuery( 335 getFilename(java.net.URI.create(uri.getSchemeSpecificPart()))); 336 } else if ("http".equals(scheme) || "https".equals(scheme)) { 337 path = chopFragmentAndQuery(uri.getPath()); 338 } else { 339 path = getPath(uri); 340 } 341 342 if (!evenIfSlashed && (Strings.isNullOrEmpty(path) || path.endsWith("/"))) { 343 return ""; 344 } 345 if (evenIfSlashed && (Strings.isNullOrEmpty(path) || "/".equals(path))) { 346 var host = uri.getHost(); 347 return host; 348 } 349 350 int p; 351 if (!path.endsWith("/")) p = path.lastIndexOf('/'); 352 else p = path.substring(0, path.length() - 1).lastIndexOf('/'); 353 354 return p > -1 ? chopLastSlash(path.substring(p + 1)) : chopLastSlash(path); 355 } 356 357 /** 358 * This converts e.g. "file:relative.txt" to "file:///tmp/.../relative.txt" (depending on the 359 * current working directory, obviously). It loses any query parameters and fragments. 360 */ 361 @Deprecated // TODO Get rid of the support for the fake "file:relative.txt" syntax 362 public static URI rel2abs(URI uri) { 363 if (uri == null) return uri; 364 if (!"file".equals(uri.getScheme())) return uri; 365 if (!uri.isOpaque()) return uri; 366 367 String relativePath = uri.getSchemeSpecificPart(); 368 return Path.of(relativePath).toAbsolutePath().toUri(); 369 } 370 371 public static URI absolutify(URI uri) { 372 if (hasScheme(uri)) return uri; 373 var base = TLC.optional(ContextKeys.BASE).orElseThrow(ex(uri)); 374 return base.resolve(uri); 375 } 376 377 public static String absolutify(String uri) { 378 if (hasScheme(uri)) return uri; 379 var base = TLC.optional(ContextKeys.BASE).orElseThrow(ex(uri)); 380 return resolve(base.toString(), uri); 381 } 382 383 private static String resolve(String base, String child) { 384 if (!child.startsWith("/")) return base + child; 385 return getScheme(base) + "://" + child; 386 } 387 388 private static Supplier<IllegalStateException> ex(Object uri) { 389 return () -> 390 new IllegalStateException("Missing ContextKeys.BASE on TLC to resolve: " + uri); 391 } 392 393 public static URI dropQueryAndFragment(URI uri) { 394 if (uri.getRawQuery() == null && uri.getRawFragment() == null) return uri; 395 try { 396 return new URI( 397 uri.getScheme(), 398 uri.getUserInfo(), 399 uri.getHost(), 400 uri.getPort(), 401 uri.getPath(), 402 null, 403 null); 404 } catch (URISyntaxException e) { 405 throw new IllegalArgumentException("TODO FIXME" + uri, e); 406 } 407 } 408 409 public static String dropQueryAndFragment(String uri) { 410 return dropQueryAndFragment(URI.create(uri)).toString(); 411 } 412 413 public static URI getBase(URI uri) throws IOException { 414 try { 415 var path = uri.getPath(); 416 if (path == null) 417 // throw new IllegalArgumentException("Cannot get Path from: " + uri); 418 return URI.create(""); 419 if (path.endsWith("/")) 420 if (uri.getQuery() == null && uri.getFragment() == null) return uri; 421 // NB: Both query and fragment are *intentionally* dropped in this case! 422 else return new URI(uri.getScheme(), uri.getAuthority(), path, null); 423 424 path = path.substring(0, path.lastIndexOf('/')); 425 // Again, same as above, we're ignoring query & fragment 426 return new URI(uri.getScheme(), uri.getAuthority(), path, null); 427 } catch (URISyntaxException e) { 428 throw new IOException("TODO Why is this new URI invalid: " + uri, e); 429 } 430 } 431 432 public static URI addFragment(URI uri, String fragment) { 433 if (Strings.isNullOrEmpty(fragment)) return uri; 434 try { 435 return new URI( 436 uri.getScheme(), 437 uri.getAuthority(), 438 uri.getPath(), 439 uri.getQuery(), 440 (uri.getFragment() != null ? uri.getFragment() : "") + fragment); 441 } catch (URISyntaxException e) { 442 throw new IllegalArgumentException("Failed to addFragment", e); 443 } 444 } 445 446 // NB: There is intentionally no URI create(String uri) method here! 447 // Do simply use {@link URI#create(String)} or {@link URI#URI(String)}. 448 // If you find that it does not correctly set the URI's Query, just use 449 // "scheme:/thing?ping=pong=pang#fragment" instead of "scheme:thing?ping=pong=pang#fragment". 450 /* 451 public static URI create(String uri) { 452 String scheme = getScheme(uri); 453 String authority = null; 454 String path = getPath(uri); 455 String query = getQueryString(uri); 456 String fragment = getFragment(uri); 457 try { 458 // This fails if path doesn't start with '/' so this entire method is pointless! 459 return new URI(scheme, authority, path, query, fragment); 460 } catch (URISyntaxException e) { 461 throw new IllegalArgumentException("Invalid Syntax: " + uri, e); 462 } 463 } 464 465 private static String getFragment(String uri) { 466 return uri.lastIndexOf('#') == -1 ? uri : uri.substring(uri.lastIndexOf('#') + 1); 467 } 468 */ 469 470 // TODO Review if getScheme(), getPath(), getQueryString(), getFragment() are *REALLY* needed?! 471 472 @Deprecated // TODO This automagic " " => %20 is fishy... who needs this, why?! 473 public static String encode(String uri) { 474 return uri.replace(" ", "%20"); 475 } 476 477 public static URI parse(String uri) throws URISyntaxException { 478 return new URI(encode(uri)); 479 } 480 481 private URIs() {} 482 483 public enum ContextKeys implements Context.Key<URI> { 484 485 /** 486 * Base URI. Used to resolve <a 487 * href="https://en.wikipedia.org/wiki/Uniform_Resource_Identifier#URI_references">relative 488 * reference URIs</a> which don't have a scheme. 489 */ 490 // TODO Support for this is new and emerging, and not yet fully comprehensive... 491 BASE 492 } 493}