1 /* 2 * Archttp - A highly performant web framework written in D. 3 * 4 * Copyright (C) 2021-2022 Kerisy.com 5 * 6 * Website: https://www.kerisy.com 7 * 8 * Licensed under the Apache-2.0 License. 9 * 10 */ 11 12 module archttp.Url; 13 14 // THanks dhasenan, Copy from https://github.com/dhasenan/urld 15 16 import std.conv; 17 import std.string; 18 19 pure: 20 @safe: 21 22 /// An exception thrown when something bad happens with Urls. 23 class UrlException : Exception 24 { 25 this(string msg) pure { super(msg); } 26 } 27 28 /** 29 * A mapping from schemes to their default ports. 30 * 31 * This is not exhaustive. Not all schemes use ports. Not all schemes uniquely identify a port to 32 * use even if they use ports. Entries here should be treated as best guesses. 33 */ 34 enum ushort[string] schemeToDefaultPort = [ 35 "aaa": 3868, 36 "aaas": 5658, 37 "acap": 674, 38 "amqp": 5672, 39 "cap": 1026, 40 "coap": 5683, 41 "coaps": 5684, 42 "dav": 443, 43 "dict": 2628, 44 "ftp": 21, 45 "git": 9418, 46 "go": 1096, 47 "gopher": 70, 48 "http": 80, 49 "https": 443, 50 "ws": 80, 51 "wss": 443, 52 "iac": 4569, 53 "icap": 1344, 54 "imap": 143, 55 "ipp": 631, 56 "ipps": 631, // yes, they're both mapped to port 631 57 "irc": 6667, // De facto default port, not the IANA reserved port. 58 "ircs": 6697, 59 "iris": 702, // defaults to iris.beep 60 "iris.beep": 702, 61 "iris.lwz": 715, 62 "iris.xpc": 713, 63 "iris.xpcs": 714, 64 "jabber": 5222, // client-to-server 65 "ldap": 389, 66 "ldaps": 636, 67 "msrp": 2855, 68 "msrps": 2855, 69 "mtqp": 1038, 70 "mupdate": 3905, 71 "news": 119, 72 "nfs": 2049, 73 "pop": 110, 74 "redis": 6379, 75 "reload": 6084, 76 "rsync": 873, 77 "rtmfp": 1935, 78 "rtsp": 554, 79 "shttp": 80, 80 "sieve": 4190, 81 "sip": 5060, 82 "sips": 5061, 83 "smb": 445, 84 "smtp": 25, 85 "snews": 563, 86 "snmp": 161, 87 "soap.beep": 605, 88 "ssh": 22, 89 "stun": 3478, 90 "stuns": 5349, 91 "svn": 3690, 92 "teamspeak": 9987, 93 "telnet": 23, 94 "tftp": 69, 95 "tip": 3372, 96 ]; 97 98 /** 99 * A collection of query parameters. 100 * 101 * This is effectively a multimap of string -> strings. 102 */ 103 struct QueryParams 104 { 105 hash_t toHash() const nothrow @safe 106 { 107 return typeid(params).getHash(¶ms); 108 } 109 110 pure: 111 import std.typecons; 112 alias Tuple!(string, "key", string, "value") Param; 113 Param[] params; 114 115 @property size_t length() const { 116 return params.length; 117 } 118 119 /// Get a range over the query parameter values for the given key. 120 auto opIndex(string key) const 121 { 122 import std.algorithm.searching : find; 123 import std.algorithm.iteration : map; 124 return params.find!(x => x.key == key).map!(x => x.value); 125 } 126 127 /// Add a query parameter with the given key and value. 128 /// If one already exists, there will now be two query parameters with the given name. 129 void add(string key, string value) { 130 params ~= Param(key, value); 131 } 132 133 /// Add a query parameter with the given key and value. 134 /// If there are any existing parameters with the same key, they are removed and overwritten. 135 void overwrite(string key, string value) { 136 for (int i = 0; i < params.length; i++) { 137 if (params[i].key == key) { 138 params[i] = params[$-1]; 139 params.length--; 140 } 141 } 142 params ~= Param(key, value); 143 } 144 145 private struct QueryParamRange 146 { 147 pure: 148 size_t i; 149 const(Param)[] params; 150 bool empty() { return i >= params.length; } 151 void popFront() { i++; } 152 Param front() { return params[i]; } 153 } 154 155 /** 156 * A range over the query parameters. 157 * 158 * Usage: 159 * --- 160 * foreach (key, value; url.queryParams) {} 161 * --- 162 */ 163 auto range() const 164 { 165 return QueryParamRange(0, this.params); 166 } 167 /// ditto 168 alias range this; 169 170 /// Convert this set of query parameters into a query string. 171 string toString() const { 172 import std.array : Appender; 173 Appender!string s; 174 bool first = true; 175 foreach (tuple; this) { 176 if (!first) { 177 s ~= '&'; 178 } 179 first = false; 180 s ~= tuple.key.percentEncode; 181 if (tuple.value.length > 0) { 182 s ~= '='; 183 s ~= tuple.value.percentEncode; 184 } 185 } 186 return s.data; 187 } 188 189 /// Clone this set of query parameters. 190 QueryParams dup() 191 { 192 QueryParams other = this; 193 other.params = params.dup; 194 return other; 195 } 196 197 int opCmp(const ref QueryParams other) const 198 { 199 for (int i = 0; i < params.length && i < other.params.length; i++) 200 { 201 auto c = cmp(params[i].key, other.params[i].key); 202 if (c != 0) return c; 203 c = cmp(params[i].value, other.params[i].value); 204 if (c != 0) return c; 205 } 206 if (params.length > other.params.length) return 1; 207 if (params.length < other.params.length) return -1; 208 return 0; 209 } 210 } 211 212 /** 213 * A Unique Resource Locator. 214 * 215 * Urls can be parsed (see parseUrl) and implicitly convert to strings. 216 */ 217 struct Url 218 { 219 private 220 { 221 bool _isValid = false; 222 } 223 224 hash_t toHash() const @safe nothrow 225 { 226 return asTuple().toHash(); 227 } 228 229 this(string url) 230 { 231 if (this.parse(url)) 232 { 233 _isValid = true; 234 } 235 else 236 { 237 throw new UrlException("failed to parse Url " ~ url); 238 } 239 } 240 241 bool isValid() 242 { 243 return _isValid; 244 } 245 246 /** 247 * Parse a Url from a string. 248 * 249 * This attempts to parse a wide range of Urls as people might actually type them. Some mistakes 250 * may be made. However, any Url in a correct format will be parsed correctly. 251 */ 252 private bool parse(string value) 253 { 254 // scheme:[//[user:password@]host[:port]][/]path[?query][#fragment] 255 // Scheme is optional in common use. We infer 'http' if it's not given. 256 auto i = value.indexOf("//"); 257 if (i > -1) { 258 if (i > 1) { 259 this.scheme = value[0..i-1]; 260 } 261 value = value[i+2 .. $]; 262 } else { 263 this.scheme = "http"; 264 } 265 // Check for an ipv6 hostname. 266 // [user:password@]host[:port]][/]path[?query][#fragment 267 i = value.indexOfAny([':', '/', '[']); 268 if (i == -1) { 269 // Just a hostname. 270 this.host = value.fromPuny; 271 return true; 272 } 273 274 if (value[i] == ':') { 275 // This could be between username and password, or it could be between host and port. 276 auto j = value.indexOfAny(['@', '/']); 277 if (j > -1 && value[j] == '@') { 278 try { 279 this.user = value[0..i].percentDecode; 280 this.pass = value[i+1 .. j].percentDecode; 281 } catch (UrlException) { 282 return false; 283 } 284 value = value[j+1 .. $]; 285 } 286 } 287 288 // It's trying to be a host/port, not a user/pass. 289 i = value.indexOfAny([':', '/', '[']); 290 if (i == -1) { 291 this.host = value.fromPuny; 292 return true; 293 } 294 295 // Find the hostname. It's either an ipv6 address (which has special rules) or not (which doesn't 296 // have special rules). -- The main sticking point is that ipv6 addresses have colons, which we 297 // handle specially, and are offset with square brackets. 298 if (value[i] == '[') { 299 auto j = value[i..$].indexOf(']'); 300 if (j < 0) { 301 // unterminated ipv6 addr 302 return false; 303 } 304 // includes square brackets 305 this.host = value[i .. i+j+1]; 306 value = value[i+j+1 .. $]; 307 if (value.length == 0) { 308 // read to end of string; we finished parse 309 return true; 310 } 311 if (value[0] != ':' && value[0] != '?' && value[0] != '/') { 312 return false; 313 } 314 } else { 315 // Normal host. 316 this.host = value[0..i].fromPuny; 317 value = value[i .. $]; 318 } 319 320 if (value[0] == ':') { 321 auto end = value.indexOf('/'); 322 if (end == -1) { 323 end = value.length; 324 } 325 try { 326 this.port = value[1 .. end].to!ushort; 327 } catch (ConvException) { 328 return false; 329 } 330 value = value[end .. $]; 331 if (value.length == 0) { 332 return true; 333 } 334 } 335 336 return parsePathAndQuery(value); 337 } 338 339 private bool parsePathAndQuery(string value) 340 { 341 auto i = value.indexOfAny("?#"); 342 if (i == -1) 343 { 344 this.path = value.percentDecode; 345 return true; 346 } 347 348 try 349 { 350 this.path = value[0..i].percentDecode; 351 } 352 catch (UrlException) 353 { 354 return false; 355 } 356 357 auto c = value[i]; 358 value = value[i + 1 .. $]; 359 if (c == '?') 360 { 361 i = value.indexOf('#'); 362 string query; 363 if (i < 0) 364 { 365 query = value; 366 value = null; 367 } 368 else 369 { 370 query = value[0..i]; 371 value = value[i + 1 .. $]; 372 } 373 auto queries = query.split('&'); 374 foreach (q; queries) 375 { 376 auto j = q.indexOf('='); 377 string key, val; 378 if (j < 0) 379 { 380 key = q; 381 } 382 else 383 { 384 key = q[0..j]; 385 val = q[j + 1 .. $]; 386 } 387 try 388 { 389 key = key.percentDecode; 390 val = val.percentDecode; 391 } 392 catch (UrlException) 393 { 394 return false; 395 } 396 this.queryParams.add(key, val); 397 } 398 } 399 400 try 401 { 402 this.fragment = value.percentDecode; 403 } 404 catch (UrlException) 405 { 406 return false; 407 } 408 409 return true; 410 } 411 412 pure: 413 /// The Url scheme. For instance, ssh, ftp, or https. 414 string scheme; 415 416 /// The username in this Url. Usually absent. If present, there will also be a password. 417 string user; 418 419 /// The password in this Url. Usually absent. 420 string pass; 421 422 /// The hostname. 423 string host; 424 425 /** 426 * The port. 427 * 428 * This is inferred from the scheme if it isn't present in the Url itself. 429 * If the scheme is not known and the port is not present, the port will be given as 0. 430 * For some schemes, port will not be sensible -- for instance, file or chrome-extension. 431 * 432 * If you explicitly need to detect whether the user provided a port, check the providedPort 433 * field. 434 */ 435 @property ushort port() const nothrow 436 { 437 if (providedPort != 0) { 438 return providedPort; 439 } 440 if (auto p = scheme in schemeToDefaultPort) { 441 return *p; 442 } 443 return 0; 444 } 445 446 /** 447 * Set the port. 448 * 449 * This sets the providedPort field and is provided for convenience. 450 */ 451 @property ushort port(ushort value) nothrow 452 { 453 return providedPort = value; 454 } 455 456 /// The port that was explicitly provided in the Url. 457 ushort providedPort; 458 459 /** 460 * The path. 461 * 462 * For instance, in the Url https://cnn.com/news/story/17774?visited=false, the path is 463 * "/news/story/17774". 464 */ 465 string path; 466 467 /** 468 * The query parameters associated with this Url. 469 */ 470 QueryParams queryParams; 471 472 /** 473 * The fragment. In web documents, this typically refers to an anchor element. 474 * For instance, in the Url https://cnn.com/news/story/17774#header2, the fragment is "header2". 475 */ 476 string fragment; 477 478 /** 479 * Convert this Url to a string. 480 * The string is properly formatted and usable for, eg, a web request. 481 */ 482 string toString() const 483 { 484 return toString(false); 485 } 486 487 /** 488 * Convert this Url to a string. 489 * 490 * The string is intended to be human-readable rather than machine-readable. 491 */ 492 string toHumanReadableString() const 493 { 494 return toString(true); 495 } 496 497 /// 498 unittest 499 { 500 auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseUrl; 501 assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye", url.toString); 502 assert(url.toHumanReadableString == "https://☂.☃.org/?hi=bye", url.toString); 503 } 504 505 unittest 506 { 507 assert("http://example.org/some_path".parseUrl.toHumanReadableString == 508 "http://example.org/some_path"); 509 } 510 511 /** 512 * Convert the path and query string of this Url to a string. 513 */ 514 string toPathAndQueryString() const 515 { 516 if (queryParams.length > 0) 517 { 518 return path ~ '?' ~ queryParams.toString; 519 } 520 return path; 521 } 522 523 /// 524 unittest 525 { 526 auto u = "http://example.org/index?page=12".parseUrl; 527 auto pathAndQuery = u.toPathAndQueryString(); 528 assert(pathAndQuery == "/index?page=12", pathAndQuery); 529 } 530 531 private string toString(bool humanReadable) const 532 { 533 import std.array : Appender; 534 Appender!string s; 535 s ~= scheme; 536 s ~= "://"; 537 if (user) { 538 s ~= humanReadable ? user : user.percentEncode; 539 s ~= ":"; 540 s ~= humanReadable ? pass : pass.percentEncode; 541 s ~= "@"; 542 } 543 s ~= humanReadable ? host : host.toPuny; 544 if (providedPort) { 545 if ((scheme in schemeToDefaultPort) == null || schemeToDefaultPort[scheme] != providedPort) { 546 s ~= ":"; 547 s ~= providedPort.to!string; 548 } 549 } 550 string p = path; 551 if (p.length == 0 || p == "/") { 552 s ~= '/'; 553 } else { 554 if (humanReadable) { 555 s ~= p; 556 } else { 557 if (p[0] == '/') { 558 p = p[1..$]; 559 } 560 foreach (part; p.split('/')) { 561 s ~= '/'; 562 s ~= part.percentEncode; 563 } 564 } 565 } 566 if (queryParams.length) { 567 s ~= '?'; 568 s ~= queryParams.toString; 569 } if (fragment) { 570 s ~= '#'; 571 s ~= fragment.percentEncode; 572 } 573 return s.data; 574 } 575 576 /// Implicitly convert Urls to strings. 577 alias toString this; 578 579 /** 580 Compare two Urls. 581 582 I tried to make the comparison produce a sort order that seems natural, so it's not identical 583 to sorting based on .toString(). For instance, username/password have lower priority than 584 host. The scheme has higher priority than port but lower than host. 585 586 While the output of this is guaranteed to provide a total ordering, and I've attempted to make 587 it human-friendly, it isn't guaranteed to be consistent between versions. The implementation 588 and its results can change without a minor version increase. 589 */ 590 int opCmp(const Url other) const 591 { 592 return asTuple.opCmp(other.asTuple); 593 } 594 595 private auto asTuple() const nothrow 596 { 597 import std.typecons : tuple; 598 return tuple(host, scheme, port, user, pass, path, queryParams); 599 } 600 601 /// Equality checks. 602 // bool opEquals(string other) const 603 // { 604 // Url o = parseUrl(other); 605 // if (!parseUrl(other)) 606 // { 607 // return false; 608 // } 609 610 // return asTuple() == o.asTuple(); 611 // } 612 613 /// Ditto 614 bool opEquals(ref const Url other) const 615 { 616 return asTuple() == other.asTuple(); 617 } 618 619 /// Ditto 620 bool opEquals(const Url other) const 621 { 622 return asTuple() == other.asTuple(); 623 } 624 625 unittest 626 { 627 import std.algorithm, std.array, std.format; 628 assert("http://example.org/some_path".parseUrl > "http://example.org/other_path".parseUrl); 629 alias sorted = std.algorithm.sort; 630 auto parsedUrls = 631 [ 632 "http://example.org/some_path", 633 "http://example.org:81/other_path", 634 "http://example.org/other_path", 635 "https://example.org/first_path", 636 "http://example.xyz/other_other_path", 637 "http://me:secret@blog.ikeran.org/wp_admin", 638 ].map!(x => x.parseUrl).array; 639 auto urls = sorted(parsedUrls).map!(x => x.toHumanReadableString).array; 640 auto expected = 641 [ 642 "http://me:secret@blog.ikeran.org/wp_admin", 643 "http://example.org/other_path", 644 "http://example.org/some_path", 645 "http://example.org:81/other_path", 646 "https://example.org/first_path", 647 "http://example.xyz/other_other_path", 648 ]; 649 assert(cmp(urls, expected) == 0, "expected:\n%s\ngot:\n%s".format(expected, urls)); 650 } 651 652 unittest 653 { 654 auto a = "http://x.org/a?b=c".parseUrl; 655 auto b = "http://x.org/a?d=e".parseUrl; 656 auto c = "http://x.org/a?b=a".parseUrl; 657 assert(a < b); 658 assert(c < b); 659 assert(c < a); 660 } 661 662 /** 663 * The append operator (~). 664 * 665 * The append operator for Urls returns a new Url with the given string appended as a path 666 * element to the Url's path. It only adds new path elements (or sequences of path elements). 667 * 668 * Don't worry about path separators; whether you include them or not, it will just work. 669 * 670 * Query elements are copied. 671 * 672 * Examples: 673 * --- 674 * auto random = "http://testdata.org/random".parseUrl; 675 * auto randInt = random ~ "int"; 676 * writeln(randInt); // prints "http://testdata.org/random/int" 677 * --- 678 */ 679 Url opBinary(string op : "~")(string subsequentPath) { 680 Url other = this; 681 other ~= subsequentPath; 682 other.queryParams = queryParams.dup; 683 return other; 684 } 685 686 /** 687 * The append-in-place operator (~=). 688 * 689 * The append operator for Urls adds a path element to this Url. It only adds new path elements 690 * (or sequences of path elements). 691 * 692 * Don't worry about path separators; whether you include them or not, it will just work. 693 * 694 * Examples: 695 * --- 696 * auto random = "http://testdata.org/random".parseUrl; 697 * random ~= "int"; 698 * writeln(random); // prints "http://testdata.org/random/int" 699 * --- 700 */ 701 Url opOpAssign(string op : "~")(string subsequentPath) { 702 if (path.endsWith("/")) { 703 if (subsequentPath.startsWith("/")) { 704 path ~= subsequentPath[1..$]; 705 } else { 706 path ~= subsequentPath; 707 } 708 } else { 709 if (!subsequentPath.startsWith("/")) { 710 path ~= '/'; 711 } 712 path ~= subsequentPath; 713 } 714 return this; 715 } 716 717 /** 718 * Convert a relative Url to an absolute Url. 719 * 720 * This is designed so that you can scrape a webpage and quickly convert links within the 721 * page to Urls you can actually work with, but you're clever; I'm sure you'll find more uses 722 * for it. 723 * 724 * It's biased toward HTTP family Urls; as one quirk, "//" is interpreted as "same scheme, 725 * different everything else", which might not be desirable for all schemes. 726 * 727 * This only handles Urls, not URIs; if you pass in 'mailto:bob.dobbs@subgenius.org', for 728 * instance, this will give you our best attempt to parse it as a Url. 729 * 730 * Examples: 731 * --- 732 * auto base = "https://example.org/passworddb?secure=false".parseUrl; 733 * 734 * // Download https://example.org/passworddb/by-username/dhasenan 735 * download(base.resolve("by-username/dhasenan")); 736 * 737 * // Download https://example.org/static/style.css 738 * download(base.resolve("/static/style.css")); 739 * 740 * // Download https://cdn.example.net/jquery.js 741 * download(base.resolve("https://cdn.example.net/jquery.js")); 742 * --- 743 */ 744 // Url resolve(string other) 745 // { 746 // if (other.length == 0) return this; 747 // if (other[0] == '/') 748 // { 749 // if (other.length > 1 && other[1] == '/') 750 // { 751 // // Uncommon syntax: a link like "//wikimedia.org" means "same scheme, switch Url" 752 // return parseUrl(this.scheme ~ ':' ~ other); 753 // } 754 // } 755 // else 756 // { 757 // auto schemeSep = other.indexOf("://"); 758 // if (schemeSep >= 0 && schemeSep < other.indexOf("/")) 759 // // separate Url 760 // { 761 // return other.parseUrl; 762 // } 763 // } 764 765 // Url ret = this; 766 // ret.path = ""; 767 // ret.queryParams = ret.queryParams.init; 768 // if (other[0] != '/') 769 // { 770 // // relative to something 771 // if (!this.path.length) 772 // { 773 // // nothing to be relative to 774 // other = "/" ~ other; 775 // } 776 // else if (this.path[$-1] == '/') 777 // { 778 // // directory-style path for the current thing 779 // // resolve relative to this directory 780 // other = this.path ~ other; 781 // } 782 // else 783 // { 784 // // this is a file-like thing 785 // // find the 'directory' and relative to that 786 // other = this.path[0..this.path.lastIndexOf('/') + 1] ~ other; 787 // } 788 // } 789 // // collapse /foo/../ to / 790 // if (other.indexOf("/../") >= 0) 791 // { 792 // import std.array : Appender, array; 793 // import std.string : split; 794 // import std.algorithm.iteration : joiner, filter; 795 // string[] parts = other.split('/'); 796 // for (int i = 0; i < parts.length; i++) 797 // { 798 // if (parts[i] == "..") 799 // { 800 // for (int j = i - 1; j >= 0; j--) 801 // { 802 // if (parts[j] != null) 803 // { 804 // parts[j] = null; 805 // parts[i] = null; 806 // break; 807 // } 808 // } 809 // } 810 // } 811 // other = "/" ~ parts.filter!(x => x != null).joiner("/").to!string; 812 // } 813 // parsePathAndQuery(ret, other); 814 // return ret; 815 // } 816 817 unittest 818 { 819 auto a = "http://alcyius.com/dndtools/index.html".parseUrl; 820 auto b = a.resolve("contacts/index.html"); 821 assert(b.toString == "http://alcyius.com/dndtools/contacts/index.html"); 822 } 823 824 unittest 825 { 826 auto a = "http://alcyius.com/dndtools/index.html?a=b".parseUrl; 827 auto b = a.resolve("contacts/index.html?foo=bar"); 828 assert(b.toString == "http://alcyius.com/dndtools/contacts/index.html?foo=bar"); 829 } 830 831 unittest 832 { 833 auto a = "http://alcyius.com/dndtools/index.html".parseUrl; 834 auto b = a.resolve("../index.html"); 835 assert(b.toString == "http://alcyius.com/index.html", b.toString); 836 } 837 838 unittest 839 { 840 auto a = "http://alcyius.com/dndtools/foo/bar/index.html".parseUrl; 841 auto b = a.resolve("../index.html"); 842 assert(b.toString == "http://alcyius.com/dndtools/foo/index.html", b.toString); 843 } 844 } 845 846 unittest { 847 { 848 // Basic. 849 Url url; 850 with (url) { 851 scheme = "https"; 852 host = "example.org"; 853 path = "/foo/bar"; 854 queryParams.add("hello", "world"); 855 queryParams.add("gibe", "clay"); 856 fragment = "frag"; 857 } 858 assert( 859 // Not sure what order it'll come out in. 860 url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" || 861 url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag", 862 url.toString); 863 } 864 { 865 // Percent encoded. 866 Url url; 867 with (url) { 868 scheme = "https"; 869 host = "example.org"; 870 path = "/f☃o"; 871 queryParams.add("❄", "❀"); 872 queryParams.add("[", "]"); 873 fragment = "ş"; 874 } 875 assert( 876 // Not sure what order it'll come out in. 877 url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" || 878 url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F", 879 url.toString); 880 } 881 { 882 // Port, user, pass. 883 Url url; 884 with (url) { 885 scheme = "https"; 886 host = "example.org"; 887 user = "dhasenan"; 888 pass = "itsasecret"; 889 port = 17; 890 } 891 assert( 892 url.toString == "https://dhasenan:itsasecret@example.org:17/", 893 url.toString); 894 } 895 { 896 // Query with no path. 897 Url url; 898 with (url) { 899 scheme = "https"; 900 host = "example.org"; 901 queryParams.add("hi", "bye"); 902 } 903 assert( 904 url.toString == "https://example.org/?hi=bye", 905 url.toString); 906 } 907 } 908 909 unittest 910 { 911 auto url = "//foo/bar".parseUrl; 912 assert(url.host == "foo", "expected host foo, got " ~ url.host); 913 assert(url.path == "/bar"); 914 } 915 916 unittest 917 { 918 import std.stdio : writeln; 919 auto url = "file:///foo/bar".parseUrl; 920 assert(url.host == null); 921 assert(url.port == 0); 922 assert(url.scheme == "file"); 923 assert(url.path == "/foo/bar"); 924 assert(url.toString == "file:///foo/bar"); 925 assert(url.queryParams.empty); 926 assert(url.fragment == null); 927 } 928 929 unittest 930 { 931 // ipv6 hostnames! 932 { 933 // full range of data 934 auto url = parseUrl("https://bob:secret@[::1]:2771/foo/bar"); 935 assert(url.scheme == "https", url.scheme); 936 assert(url.user == "bob", url.user); 937 assert(url.pass == "secret", url.pass); 938 assert(url.host == "[::1]", url.host); 939 assert(url.port == 2771, url.port.to!string); 940 assert(url.path == "/foo/bar", url.path); 941 } 942 943 // minimal 944 { 945 auto url = parseUrl("[::1]"); 946 assert(url.host == "[::1]", url.host); 947 } 948 949 // some random bits 950 { 951 auto url = parseUrl("http://[::1]/foo"); 952 assert(url.scheme == "http", url.scheme); 953 assert(url.host == "[::1]", url.host); 954 assert(url.path == "/foo", url.path); 955 } 956 957 { 958 auto url = parseUrl("https://[2001:0db8:0:0:0:0:1428:57ab]/?login=true#justkidding"); 959 assert(url.scheme == "https"); 960 assert(url.host == "[2001:0db8:0:0:0:0:1428:57ab]"); 961 assert(url.path == "/"); 962 assert(url.fragment == "justkidding"); 963 } 964 } 965 966 unittest 967 { 968 auto url = "localhost:5984".parseUrl; 969 auto url2 = url ~ "db1"; 970 assert(url2.toString == "http://localhost:5984/db1", url2.toString); 971 auto url3 = url2 ~ "_all_docs"; 972 assert(url3.toString == "http://localhost:5984/db1/_all_docs", url3.toString); 973 } 974 975 /// 976 unittest { 977 { 978 // Basic. 979 Url url; 980 with (url) { 981 scheme = "https"; 982 host = "example.org"; 983 path = "/foo/bar"; 984 queryParams.add("hello", "world"); 985 queryParams.add("gibe", "clay"); 986 fragment = "frag"; 987 } 988 assert( 989 // Not sure what order it'll come out in. 990 url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" || 991 url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag", 992 url.toString); 993 } 994 { 995 // Passing an array of query values. 996 Url url; 997 with (url) { 998 scheme = "https"; 999 host = "example.org"; 1000 path = "/foo/bar"; 1001 queryParams.add("hello", "world"); 1002 queryParams.add("hello", "aether"); 1003 fragment = "frag"; 1004 } 1005 assert( 1006 // Not sure what order it'll come out in. 1007 url.toString == "https://example.org/foo/bar?hello=world&hello=aether#frag" || 1008 url.toString == "https://example.org/foo/bar?hello=aether&hello=world#frag", 1009 url.toString); 1010 } 1011 { 1012 // Percent encoded. 1013 Url url; 1014 with (url) { 1015 scheme = "https"; 1016 host = "example.org"; 1017 path = "/f☃o"; 1018 queryParams.add("❄", "❀"); 1019 queryParams.add("[", "]"); 1020 fragment = "ş"; 1021 } 1022 assert( 1023 // Not sure what order it'll come out in. 1024 url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" || 1025 url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F", 1026 url.toString); 1027 } 1028 { 1029 // Port, user, pass. 1030 Url url; 1031 with (url) { 1032 scheme = "https"; 1033 host = "example.org"; 1034 user = "dhasenan"; 1035 pass = "itsasecret"; 1036 port = 17; 1037 } 1038 assert( 1039 url.toString == "https://dhasenan:itsasecret@example.org:17/", 1040 url.toString); 1041 } 1042 { 1043 // Query with no path. 1044 Url url; 1045 with (url) { 1046 scheme = "https"; 1047 host = "example.org"; 1048 queryParams.add("hi", "bye"); 1049 } 1050 assert( 1051 url.toString == "https://example.org/?hi=bye", 1052 url.toString); 1053 } 1054 } 1055 1056 unittest { 1057 // Percent decoding. 1058 1059 // http://#:!:@ 1060 auto urlString = "http://%23:%21%3A@example.org/%7B/%7D?%3B&%26=%3D#%23hash%EF%BF%BD"; 1061 auto url = urlString.parseUrl; 1062 assert(url.user == "#"); 1063 assert(url.pass == "!:"); 1064 assert(url.host == "example.org"); 1065 assert(url.path == "/{/}"); 1066 assert(url.queryParams[";"].front == ""); 1067 assert(url.queryParams["&"].front == "="); 1068 assert(url.fragment == "#hash�"); 1069 1070 // Round trip. 1071 assert(urlString == urlString.parseUrl.toString, urlString.parseUrl.toString); 1072 assert(urlString == urlString.parseUrl.toString.parseUrl.toString); 1073 } 1074 1075 unittest { 1076 auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseUrl; 1077 assert(url.host == "☂.☃.org", url.host); 1078 } 1079 1080 unittest { 1081 auto url = "https://☂.☃.org/?hi=bye".parseUrl; 1082 assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye"); 1083 } 1084 1085 /// 1086 unittest { 1087 // There's an existing path. 1088 auto url = parseUrl("http://example.org/foo"); 1089 Url url2; 1090 // No slash? Assume it needs a slash. 1091 assert((url ~ "bar").toString == "http://example.org/foo/bar"); 1092 // With slash? Don't add another. 1093 url2 = url ~ "/bar"; 1094 assert(url2.toString == "http://example.org/foo/bar", url2.toString); 1095 url ~= "bar"; 1096 assert(url.toString == "http://example.org/foo/bar"); 1097 1098 // Path already ends with a slash; don't add another. 1099 url = parseUrl("http://example.org/foo/"); 1100 assert((url ~ "bar").toString == "http://example.org/foo/bar"); 1101 // Still don't add one even if you're appending with a slash. 1102 assert((url ~ "/bar").toString == "http://example.org/foo/bar"); 1103 url ~= "/bar"; 1104 assert(url.toString == "http://example.org/foo/bar"); 1105 1106 // No path. 1107 url = parseUrl("http://example.org"); 1108 assert((url ~ "bar").toString == "http://example.org/bar"); 1109 assert((url ~ "/bar").toString == "http://example.org/bar"); 1110 url ~= "bar"; 1111 assert(url.toString == "http://example.org/bar"); 1112 1113 // Path is just a slash. 1114 url = parseUrl("http://example.org/"); 1115 assert((url ~ "bar").toString == "http://example.org/bar"); 1116 assert((url ~ "/bar").toString == "http://example.org/bar"); 1117 url ~= "bar"; 1118 assert(url.toString == "http://example.org/bar", url.toString); 1119 1120 // No path, just fragment. 1121 url = "ircs://irc.freenode.com/#d".parseUrl; 1122 assert(url.toString == "ircs://irc.freenode.com/#d", url.toString); 1123 } 1124 unittest 1125 { 1126 // basic resolve() 1127 { 1128 auto base = "https://example.org/this/".parseUrl; 1129 assert(base.resolve("that") == "https://example.org/this/that"); 1130 assert(base.resolve("/that") == "https://example.org/that"); 1131 assert(base.resolve("//example.net/that") == "https://example.net/that"); 1132 } 1133 1134 // ensure we don't preserve query params 1135 { 1136 auto base = "https://example.org/this?query=value&other=value2".parseUrl; 1137 assert(base.resolve("that") == "https://example.org/that"); 1138 assert(base.resolve("/that") == "https://example.org/that"); 1139 assert(base.resolve("tother/that") == "https://example.org/tother/that"); 1140 assert(base.resolve("//example.net/that") == "https://example.net/that"); 1141 } 1142 } 1143 1144 1145 unittest 1146 { 1147 import std.net.curl; 1148 auto url = "http://example.org".parseUrl; 1149 assert(is(typeof(std.net.curl.get(url)))); 1150 } 1151 1152 /** 1153 * Parse the input string as a Url. 1154 * 1155 * Throws: 1156 * UrlException if the string was in an incorrect format. 1157 */ 1158 // Url parseUrl(string value) { 1159 // return Url(value); 1160 // } 1161 1162 /// 1163 unittest { 1164 { 1165 // Infer scheme 1166 auto u1 = parseUrl("example.org"); 1167 assert(u1.scheme == "http"); 1168 assert(u1.host == "example.org"); 1169 assert(u1.path == ""); 1170 assert(u1.port == 80); 1171 assert(u1.providedPort == 0); 1172 assert(u1.fragment == ""); 1173 } 1174 { 1175 // Simple host and scheme 1176 auto u1 = parseUrl("https://example.org"); 1177 assert(u1.scheme == "https"); 1178 assert(u1.host == "example.org"); 1179 assert(u1.path == ""); 1180 assert(u1.port == 443); 1181 assert(u1.providedPort == 0); 1182 } 1183 { 1184 // With path 1185 auto u1 = parseUrl("https://example.org/foo/bar"); 1186 assert(u1.scheme == "https"); 1187 assert(u1.host == "example.org"); 1188 assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path); 1189 assert(u1.port == 443); 1190 assert(u1.providedPort == 0); 1191 } 1192 { 1193 // With explicit port 1194 auto u1 = parseUrl("https://example.org:1021/foo/bar"); 1195 assert(u1.scheme == "https"); 1196 assert(u1.host == "example.org"); 1197 assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path); 1198 assert(u1.port == 1021); 1199 assert(u1.providedPort == 1021); 1200 } 1201 { 1202 // With user 1203 auto u1 = parseUrl("https://bob:secret@example.org/foo/bar"); 1204 assert(u1.scheme == "https"); 1205 assert(u1.host == "example.org"); 1206 assert(u1.path == "/foo/bar"); 1207 assert(u1.port == 443); 1208 assert(u1.user == "bob"); 1209 assert(u1.pass == "secret"); 1210 } 1211 { 1212 // With user, Url-encoded 1213 auto u1 = parseUrl("https://bob%21:secret%21%3F@example.org/foo/bar"); 1214 assert(u1.scheme == "https"); 1215 assert(u1.host == "example.org"); 1216 assert(u1.path == "/foo/bar"); 1217 assert(u1.port == 443); 1218 assert(u1.user == "bob!"); 1219 assert(u1.pass == "secret!?"); 1220 } 1221 { 1222 // With user and port and path 1223 auto u1 = parseUrl("https://bob:secret@example.org:2210/foo/bar"); 1224 assert(u1.scheme == "https"); 1225 assert(u1.host == "example.org"); 1226 assert(u1.path == "/foo/bar"); 1227 assert(u1.port == 2210); 1228 assert(u1.user == "bob"); 1229 assert(u1.pass == "secret"); 1230 assert(u1.fragment == ""); 1231 } 1232 { 1233 // With query string 1234 auto u1 = parseUrl("https://example.org/?login=true"); 1235 assert(u1.scheme == "https"); 1236 assert(u1.host == "example.org"); 1237 assert(u1.path == "/", "expected path: / actual path: " ~ u1.path); 1238 assert(u1.queryParams["login"].front == "true"); 1239 assert(u1.fragment == ""); 1240 } 1241 { 1242 // With query string and fragment 1243 auto u1 = parseUrl("https://example.org/?login=true#justkidding"); 1244 assert(u1.scheme == "https"); 1245 assert(u1.host == "example.org"); 1246 assert(u1.path == "/", "expected path: / actual path: " ~ u1.path); 1247 assert(u1.queryParams["login"].front == "true"); 1248 assert(u1.fragment == "justkidding"); 1249 } 1250 { 1251 // With Url-encoded values 1252 auto u1 = parseUrl("https://example.org/%E2%98%83?%E2%9D%84=%3D#%5E"); 1253 assert(u1.scheme == "https"); 1254 assert(u1.host == "example.org"); 1255 assert(u1.path == "/☃", "expected path: /☃ actual path: " ~ u1.path); 1256 assert(u1.queryParams["❄"].front == "="); 1257 assert(u1.fragment == "^"); 1258 } 1259 } 1260 1261 unittest { 1262 assert(parseUrl("http://example.org").port == 80); 1263 assert(parseUrl("http://example.org:5326").port == 5326); 1264 1265 auto url = parseUrl("redis://admin:password@redisbox.local:2201/path?query=value#fragment"); 1266 assert(url.scheme == "redis"); 1267 assert(url.user == "admin"); 1268 assert(url.pass == "password"); 1269 1270 assert(parseUrl("example.org").toString == "http://example.org/"); 1271 assert(parseUrl("http://example.org:80").toString == "http://example.org/"); 1272 1273 assert(parseUrl("localhost:8070").toString == "http://localhost:8070/"); 1274 } 1275 1276 /** 1277 * Percent-encode a string. 1278 * 1279 * Url components cannot contain non-ASCII characters, and there are very few characters that are 1280 * safe to include as Url components. Domain names using Unicode values use Punycode. For 1281 * everything else, there is percent encoding. 1282 */ 1283 string percentEncode(string raw) { 1284 // We *must* encode these characters: :/?#[]@!$&'()*+,;=" 1285 // We *can* encode any other characters. 1286 // We *should not* encode alpha, numeric, or -._~. 1287 import std.utf : encode; 1288 import std.array : Appender; 1289 Appender!string app; 1290 foreach (dchar d; raw) { 1291 if (('a' <= d && 'z' >= d) || 1292 ('A' <= d && 'Z' >= d) || 1293 ('0' <= d && '9' >= d) || 1294 d == '-' || d == '.' || d == '_' || d == '~') { 1295 app ~= d; 1296 continue; 1297 } 1298 // Something simple like a space character? Still in 7-bit ASCII? 1299 // Then we get a single-character string out of it and just encode 1300 // that one bit. 1301 // Something not in 7-bit ASCII? Then we percent-encode each octet 1302 // in the UTF-8 encoding (and hope the server understands UTF-8). 1303 char[] c; 1304 encode(c, d); 1305 auto bytes = cast(ubyte[])c; 1306 foreach (b; bytes) { 1307 app ~= format("%%%02X", b); 1308 } 1309 } 1310 return cast(string)app.data; 1311 } 1312 1313 /// 1314 unittest { 1315 assert(percentEncode("IDontNeedNoPercentEncoding") == "IDontNeedNoPercentEncoding"); 1316 assert(percentEncode("~~--..__") == "~~--..__"); 1317 assert(percentEncode("0123456789") == "0123456789"); 1318 1319 string e; 1320 1321 e = percentEncode("☃"); 1322 assert(e == "%E2%98%83", "expected %E2%98%83 but got" ~ e); 1323 } 1324 1325 /** 1326 * Percent-decode a string. 1327 * 1328 * Url components cannot contain non-ASCII characters, and there are very few characters that are 1329 * safe to include as Url components. Domain names using Unicode values use Punycode. For 1330 * everything else, there is percent encoding. 1331 * 1332 * This explicitly ensures that the result is a valid UTF-8 string. 1333 */ 1334 string percentDecode(string encoded) 1335 { 1336 import std.utf : validate, UTFException; 1337 auto raw = percentDecodeRaw(encoded); 1338 auto s = cast(string) raw; 1339 try 1340 { 1341 validate(s); 1342 } 1343 catch (UTFException e) 1344 { 1345 throw new UrlException( 1346 "The percent-encoded data `" ~ encoded ~ "` does not represent a valid UTF-8 sequence."); 1347 } 1348 return s; 1349 } 1350 1351 /// 1352 unittest { 1353 assert(percentDecode("IDontNeedNoPercentDecoding") == "IDontNeedNoPercentDecoding"); 1354 assert(percentDecode("~~--..__") == "~~--..__"); 1355 assert(percentDecode("0123456789") == "0123456789"); 1356 1357 string e; 1358 1359 e = percentDecode("%E2%98%83"); 1360 assert(e == "☃", "expected a snowman but got" ~ e); 1361 1362 e = percentDecode("%e2%98%83"); 1363 assert(e == "☃", "expected a snowman but got" ~ e); 1364 1365 try { 1366 // %ES is an invalid percent sequence: 'S' is not a hex digit. 1367 percentDecode("%es"); 1368 assert(false, "expected exception not thrown"); 1369 } catch (UrlException) { 1370 } 1371 1372 try { 1373 percentDecode("%e"); 1374 assert(false, "expected exception not thrown"); 1375 } catch (UrlException) { 1376 } 1377 } 1378 1379 /** 1380 * Percent-decode a string into a ubyte array. 1381 * 1382 * Url components cannot contain non-ASCII characters, and there are very few characters that are 1383 * safe to include as Url components. Domain names using Unicode values use Punycode. For 1384 * everything else, there is percent encoding. 1385 * 1386 * This yields a ubyte array and will not perform validation on the output. However, an improperly 1387 * formatted input string will result in a UrlException. 1388 */ 1389 immutable(ubyte)[] percentDecodeRaw(string encoded) 1390 { 1391 // We're dealing with possibly incorrectly encoded UTF-8. Mark it down as ubyte[] for now. 1392 import std.array : Appender; 1393 Appender!(immutable(ubyte)[]) app; 1394 for (int i = 0; i < encoded.length; i++) { 1395 if (encoded[i] != '%') { 1396 app ~= encoded[i]; 1397 continue; 1398 } 1399 if (i >= encoded.length - 2) { 1400 throw new UrlException("Invalid percent encoded value: expected two characters after " ~ 1401 "percent symbol. Error at index " ~ i.to!string); 1402 } 1403 if (isHex(encoded[i + 1]) && isHex(encoded[i + 2])) { 1404 auto b = fromHex(encoded[i + 1]); 1405 auto c = fromHex(encoded[i + 2]); 1406 app ~= cast(ubyte)((b << 4) | c); 1407 } else { 1408 throw new UrlException("Invalid percent encoded value: expected two hex digits after " ~ 1409 "percent symbol. Error at index " ~ i.to!string); 1410 } 1411 i += 2; 1412 } 1413 return app.data; 1414 } 1415 1416 private bool isHex(char c) { 1417 return ('0' <= c && '9' >= c) || 1418 ('a' <= c && 'f' >= c) || 1419 ('A' <= c && 'F' >= c); 1420 } 1421 1422 private ubyte fromHex(char s) { 1423 enum caseDiff = 'a' - 'A'; 1424 if (s >= 'a' && s <= 'z') { 1425 s -= caseDiff; 1426 } 1427 return cast(ubyte)("0123456789ABCDEF".indexOf(s)); 1428 } 1429 1430 private string toPuny(string unicodeHostname) 1431 { 1432 if (unicodeHostname.length == 0) return ""; 1433 if (unicodeHostname[0] == '[') 1434 { 1435 // It's an ipv6 name. 1436 return unicodeHostname; 1437 } 1438 bool mustEncode = false; 1439 foreach (i, dchar d; unicodeHostname) { 1440 auto c = cast(uint) d; 1441 if (c > 0x80) { 1442 mustEncode = true; 1443 break; 1444 } 1445 if (c < 0x2C || (c >= 0x3A && c <= 40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B)) { 1446 throw new UrlException( 1447 format( 1448 "domain name '%s' contains illegal character '%s' at position %s", 1449 unicodeHostname, d, i)); 1450 } 1451 } 1452 if (!mustEncode) { 1453 return unicodeHostname; 1454 } 1455 import std.algorithm.iteration : map; 1456 return unicodeHostname.split('.').map!punyEncode.join("."); 1457 } 1458 1459 private string fromPuny(string hostname) 1460 { 1461 import std.algorithm.iteration : map; 1462 return hostname.split('.').map!punyDecode.join("."); 1463 } 1464 1465 private { 1466 enum delimiter = '-'; 1467 enum marker = "xn--"; 1468 enum ulong damp = 700; 1469 enum ulong tmin = 1; 1470 enum ulong tmax = 26; 1471 enum ulong skew = 38; 1472 enum ulong base = 36; 1473 enum ulong initialBias = 72; 1474 enum dchar initialN = cast(dchar)128; 1475 1476 ulong adapt(ulong delta, ulong numPoints, bool firstTime) { 1477 if (firstTime) { 1478 delta /= damp; 1479 } else { 1480 delta /= 2; 1481 } 1482 delta += delta / numPoints; 1483 ulong k = 0; 1484 while (delta > ((base - tmin) * tmax) / 2) { 1485 delta /= (base - tmin); 1486 k += base; 1487 } 1488 return k + (((base - tmin + 1) * delta) / (delta + skew)); 1489 } 1490 } 1491 1492 /** 1493 * Encode the input string using the Punycode algorithm. 1494 * 1495 * Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked 1496 * with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com" 1497 * in Punycode, you will get "xn--m3h.xn--n3h.com". 1498 * 1499 * In order to puny-encode a domain name, you must split it into its components. The following will 1500 * typically suffice: 1501 * --- 1502 * auto domain = "☂.☃.com"; 1503 * auto encodedDomain = domain.splitter(".").map!(punyEncode).join("."); 1504 * --- 1505 */ 1506 string punyEncode(string input) 1507 { 1508 import std.array : Appender; 1509 ulong delta = 0; 1510 dchar n = initialN; 1511 auto i = 0; 1512 auto bias = initialBias; 1513 Appender!string output; 1514 output ~= marker; 1515 auto pushed = 0; 1516 auto codePoints = 0; 1517 foreach (dchar c; input) { 1518 codePoints++; 1519 if (c <= initialN) { 1520 output ~= c; 1521 pushed++; 1522 } 1523 } 1524 if (pushed < codePoints) { 1525 if (pushed > 0) { 1526 output ~= delimiter; 1527 } 1528 } else { 1529 // No encoding to do. 1530 return input; 1531 } 1532 bool first = true; 1533 while (pushed < codePoints) { 1534 auto best = dchar.max; 1535 foreach (dchar c; input) { 1536 if (n <= c && c < best) { 1537 best = c; 1538 } 1539 } 1540 if (best == dchar.max) { 1541 throw new UrlException("failed to find a new codepoint to process during punyencode"); 1542 } 1543 delta += (best - n) * (pushed + 1); 1544 if (delta > uint.max) { 1545 // TODO better error message 1546 throw new UrlException("overflow during punyencode"); 1547 } 1548 n = best; 1549 foreach (dchar c; input) { 1550 if (c < n) { 1551 delta++; 1552 } 1553 if (c == n) { 1554 ulong q = delta; 1555 auto k = base; 1556 while (true) { 1557 ulong t; 1558 if (k <= bias) { 1559 t = tmin; 1560 } else if (k >= bias + tmax) { 1561 t = tmax; 1562 } else { 1563 t = k - bias; 1564 } 1565 if (q < t) { 1566 break; 1567 } 1568 output ~= digitToBasic(t + ((q - t) % (base - t))); 1569 q = (q - t) / (base - t); 1570 k += base; 1571 } 1572 output ~= digitToBasic(q); 1573 pushed++; 1574 bias = adapt(delta, pushed, first); 1575 first = false; 1576 delta = 0; 1577 } 1578 } 1579 delta++; 1580 n++; 1581 } 1582 return cast(string)output.data; 1583 } 1584 1585 /** 1586 * Decode the input string using the Punycode algorithm. 1587 * 1588 * Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked 1589 * with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com" 1590 * in Punycode, you will get "xn--m3h.xn--n3h.com". 1591 * 1592 * In order to puny-decode a domain name, you must split it into its components. The following will 1593 * typically suffice: 1594 * --- 1595 * auto domain = "xn--m3h.xn--n3h.com"; 1596 * auto decodedDomain = domain.splitter(".").map!(punyDecode).join("."); 1597 * --- 1598 */ 1599 string punyDecode(string input) { 1600 if (!input.startsWith(marker)) { 1601 return input; 1602 } 1603 input = input[marker.length..$]; 1604 1605 // let n = initial_n 1606 dchar n = cast(dchar)128; 1607 1608 // let i = 0 1609 // let bias = initial_bias 1610 // let output = an empty string indexed from 0 1611 size_t i = 0; 1612 auto bias = initialBias; 1613 dchar[] output; 1614 // This reserves a bit more than necessary, but it should be more efficient overall than just 1615 // appending and inserting volo-nolo. 1616 output.reserve(input.length); 1617 1618 // consume all code points before the last delimiter (if there is one) 1619 // and copy them to output, fail on any non-basic code point 1620 // if more than zero code points were consumed then consume one more 1621 // (which will be the last delimiter) 1622 auto end = input.lastIndexOf(delimiter); 1623 if (end > -1) { 1624 foreach (dchar c; input[0..end]) { 1625 output ~= c; 1626 } 1627 input = input[end+1 .. $]; 1628 } 1629 1630 // while the input is not exhausted do begin 1631 size_t pos = 0; 1632 while (pos < input.length) { 1633 // let oldi = i 1634 // let w = 1 1635 auto oldi = i; 1636 auto w = 1; 1637 // for k = base to infinity in steps of base do begin 1638 for (ulong k = base; k < uint.max; k += base) { 1639 // consume a code point, or fail if there was none to consume 1640 // Note that the input is all ASCII, so we can simply index the input string bytewise. 1641 auto c = input[pos]; 1642 pos++; 1643 // let digit = the code point's digit-value, fail if it has none 1644 auto digit = basicToDigit(c); 1645 // let i = i + digit * w, fail on overflow 1646 i += digit * w; 1647 // let t = tmin if k <= bias {+ tmin}, or 1648 // tmax if k >= bias + tmax, or k - bias otherwise 1649 ulong t; 1650 if (k <= bias) { 1651 t = tmin; 1652 } else if (k >= bias + tmax) { 1653 t = tmax; 1654 } else { 1655 t = k - bias; 1656 } 1657 // if digit < t then break 1658 if (digit < t) { 1659 break; 1660 } 1661 // let w = w * (base - t), fail on overflow 1662 w *= (base - t); 1663 // end 1664 } 1665 // let bias = adapt(i - oldi, length(output) + 1, test oldi is 0?) 1666 bias = adapt(i - oldi, output.length + 1, oldi == 0); 1667 // let n = n + i div (length(output) + 1), fail on overflow 1668 n += i / (output.length + 1); 1669 // let i = i mod (length(output) + 1) 1670 i %= (output.length + 1); 1671 // {if n is a basic code point then fail} 1672 // (We aren't actually going to fail here; it's clear what this means.) 1673 // insert n into output at position i 1674 import std.array : insertInPlace; 1675 (() @trusted { output.insertInPlace(i, cast(dchar)n); })(); // should be @safe but isn't marked 1676 // increment i 1677 i++; 1678 // end 1679 } 1680 return output.to!string; 1681 } 1682 1683 // Lifted from punycode.js. 1684 private dchar digitToBasic(ulong digit) { 1685 return cast(dchar)(digit + 22 + 75 * (digit < 26)); 1686 } 1687 1688 // Lifted from punycode.js. 1689 private uint basicToDigit(char c) { 1690 auto codePoint = cast(uint)c; 1691 if (codePoint - 48 < 10) { 1692 return codePoint - 22; 1693 } 1694 if (codePoint - 65 < 26) { 1695 return codePoint - 65; 1696 } 1697 if (codePoint - 97 < 26) { 1698 return codePoint - 97; 1699 } 1700 return base; 1701 } 1702 1703 unittest { 1704 { 1705 auto a = "b\u00FCcher"; 1706 assert(punyEncode(a) == "xn--bcher-kva"); 1707 } 1708 { 1709 auto a = "b\u00FCc\u00FCher"; 1710 assert(punyEncode(a) == "xn--bcher-kvab"); 1711 } 1712 { 1713 auto a = "ýbücher"; 1714 auto b = punyEncode(a); 1715 assert(b == "xn--bcher-kvaf", b); 1716 } 1717 1718 { 1719 auto a = "mañana"; 1720 assert(punyEncode(a) == "xn--maana-pta"); 1721 } 1722 1723 { 1724 auto a = "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644" 1725 ~ "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F"; 1726 auto b = punyEncode(a); 1727 assert(b == "xn--egbpdaj6bu4bxfgehfvwxn", b); 1728 } 1729 import std.stdio; 1730 } 1731 1732 unittest { 1733 { 1734 auto b = punyDecode("xn--egbpdaj6bu4bxfgehfvwxn"); 1735 assert(b == "ليهمابتكلموشعربي؟", b); 1736 } 1737 { 1738 assert(punyDecode("xn--maana-pta") == "mañana"); 1739 } 1740 } 1741 1742 unittest { 1743 import std.string, std.algorithm, std.array, std.range; 1744 { 1745 auto domain = "xn--m3h.xn--n3h.com"; 1746 auto decodedDomain = domain.splitter(".").map!(punyDecode).join("."); 1747 assert(decodedDomain == "☂.☃.com", decodedDomain); 1748 } 1749 { 1750 auto domain = "☂.☃.com"; 1751 auto decodedDomain = domain.splitter(".").map!(punyEncode).join("."); 1752 assert(decodedDomain == "xn--m3h.xn--n3h.com", decodedDomain); 1753 } 1754 } 1755