archttp.Url source code

1 /*
2  * Archttp - A highly performant web framework written in D.
3  *
4  * Copyright (C) 2021-2022 Kerisy.com
5  *
6  * Website: https://www.kerisy.com
7  *
8  * Licensed under the Apache-2.0 License.
9  *
10  */
11 
12 module archttp.Url;
13 
14 // THanks dhasenan, Copy from https://github.com/dhasenan/urld
15 
16 import std.conv;
17 import std.string;
18 
19 pure:
20 @safe:
21 
22 /// An exception thrown when something bad happens with Urls.
23 class UrlException : Exception
24 {
25     this(string msg) pure { super(msg); }
26 }
27 
28 /**
29     * A mapping from schemes to their default ports.
30     *
31   * This is not exhaustive. Not all schemes use ports. Not all schemes uniquely identify a port to
32     * use even if they use ports. Entries here should be treated as best guesses.
33   */
34 enum ushort[string] schemeToDefaultPort = [
35     "aaa": 3868,
36     "aaas": 5658,
37     "acap": 674,
38     "amqp": 5672,
39     "cap": 1026,
40     "coap": 5683,
41     "coaps": 5684,
42     "dav": 443,
43     "dict": 2628,
44     "ftp": 21,
45     "git": 9418,
46     "go": 1096,
47     "gopher": 70,
48     "http": 80,
49     "https": 443,
50     "ws": 80,
51     "wss": 443,
52     "iac": 4569,
53     "icap": 1344,
54     "imap": 143,
55     "ipp": 631,
56     "ipps": 631,  // yes, they're both mapped to port 631
57     "irc": 6667,  // De facto default port, not the IANA reserved port.
58     "ircs": 6697,
59     "iris": 702,  // defaults to iris.beep
60     "iris.beep": 702,
61     "iris.lwz": 715,
62     "iris.xpc": 713,
63     "iris.xpcs": 714,
64     "jabber": 5222,  // client-to-server
65     "ldap": 389,
66     "ldaps": 636,
67     "msrp": 2855,
68     "msrps": 2855,
69     "mtqp": 1038,
70     "mupdate": 3905,
71     "news": 119,
72     "nfs": 2049,
73     "pop": 110,
74     "redis": 6379,
75     "reload": 6084,
76     "rsync": 873,
77     "rtmfp": 1935,
78     "rtsp": 554,
79     "shttp": 80,
80     "sieve": 4190,
81     "sip": 5060,
82     "sips": 5061,
83     "smb": 445,
84     "smtp": 25,
85     "snews": 563,
86     "snmp": 161,
87     "soap.beep": 605,
88     "ssh": 22,
89     "stun": 3478,
90     "stuns": 5349,
91     "svn": 3690,
92     "teamspeak": 9987,
93     "telnet": 23,
94     "tftp": 69,
95     "tip": 3372,
96 ];
97 
98 /**
99     * A collection of query parameters.
100     *
101     * This is effectively a multimap of string -> strings.
102     */
103 struct QueryParams
104 {
105     hash_t toHash() const nothrow @safe
106     {
107         return typeid(params).getHash(&params);
108     }
109 
110 pure:
111     import std.typecons;
112     alias Tuple!(string, "key", string, "value") Param;
113     Param[] params;
114 
115     @property size_t length() const {
116         return params.length;
117     }
118 
119     /// Get a range over the query parameter values for the given key.
120     auto opIndex(string key) const
121     {
122         import std.algorithm.searching : find;
123         import std.algorithm.iteration : map;
124         return params.find!(x => x.key == key).map!(x => x.value);
125     }
126 
127     /// Add a query parameter with the given key and value.
128     /// If one already exists, there will now be two query parameters with the given name.
129     void add(string key, string value) {
130         params ~= Param(key, value);
131     }
132 
133     /// Add a query parameter with the given key and value.
134     /// If there are any existing parameters with the same key, they are removed and overwritten.
135     void overwrite(string key, string value) {
136         for (int i = 0; i < params.length; i++) {
137             if (params[i].key == key) {
138                 params[i] = params[$-1];
139                 params.length--;
140             }
141         }
142         params ~= Param(key, value);
143     }
144 
145     private struct QueryParamRange
146     {
147 pure:
148         size_t i;
149         const(Param)[] params;
150         bool empty() { return i >= params.length; }
151         void popFront() { i++; }
152         Param front() { return params[i]; }
153     }
154 
155     /**
156      * A range over the query parameters.
157      *
158      * Usage:
159      * ---
160      * foreach (key, value; url.queryParams) {}
161      * ---
162      */
163     auto range() const
164     {
165         return QueryParamRange(0, this.params);
166     }
167     /// ditto
168     alias range this;
169 
170     /// Convert this set of query parameters into a query string.
171     string toString() const {
172         import std.array : Appender;
173         Appender!string s;
174         bool first = true;
175         foreach (tuple; this) {
176             if (!first) {
177                 s ~= '&';
178             }
179             first = false;
180             s ~= tuple.key.percentEncode;
181             if (tuple.value.length > 0) {
182                 s ~= '=';
183                 s ~= tuple.value.percentEncode;
184             }
185         }
186         return s.data;
187     }
188 
189     /// Clone this set of query parameters.
190     QueryParams dup()
191     {
192         QueryParams other = this;
193         other.params = params.dup;
194         return other;
195     }
196 
197     int opCmp(const ref QueryParams other) const
198     {
199         for (int i = 0; i < params.length && i < other.params.length; i++)
200         {
201             auto c = cmp(params[i].key, other.params[i].key);
202             if (c != 0) return c;
203             c = cmp(params[i].value, other.params[i].value);
204             if (c != 0) return c;
205         }
206         if (params.length > other.params.length) return 1;
207         if (params.length < other.params.length) return -1;
208         return 0;
209     }
210 }
211 
212 /**
213     * A Unique Resource Locator.
214     *
215     * Urls can be parsed (see parseUrl) and implicitly convert to strings.
216     */
217 struct Url
218 {
219     private
220     {
221         bool _isValid = false;
222     }
223 
224     hash_t toHash() const @safe nothrow
225     {
226         return asTuple().toHash();
227     }
228 
229     this(string url)
230     {
231         if (this.parse(url))
232         {
233             _isValid = true;
234         }
235         else
236         {
237             throw new UrlException("failed to parse Url " ~ url);
238         }
239     }
240 
241     bool isValid()
242     {
243         return _isValid;
244     }
245 
246     /**
247     * Parse a Url from a string.
248     *
249     * This attempts to parse a wide range of Urls as people might actually type them. Some mistakes
250     * may be made. However, any Url in a correct format will be parsed correctly.
251     */
252     private bool parse(string value)
253     {
254         // scheme:[//[user:password@]host[:port]][/]path[?query][#fragment]
255         // Scheme is optional in common use. We infer 'http' if it's not given.
256         auto i = value.indexOf("//");
257         if (i > -1) {
258             if (i > 1) {
259                 this.scheme = value[0..i-1];
260             }
261             value = value[i+2 .. $];
262         } else {
263             this.scheme = "http";
264         }
265     // Check for an ipv6 hostname.
266         // [user:password@]host[:port]][/]path[?query][#fragment
267         i = value.indexOfAny([':', '/', '[']);
268         if (i == -1) {
269             // Just a hostname.
270             this.host = value.fromPuny;
271             return true;
272         }
273 
274         if (value[i] == ':') {
275             // This could be between username and password, or it could be between host and port.
276             auto j = value.indexOfAny(['@', '/']);
277             if (j > -1 && value[j] == '@') {
278                 try {
279                     this.user = value[0..i].percentDecode;
280                     this.pass = value[i+1 .. j].percentDecode;
281                 } catch (UrlException) {
282                     return false;
283                 }
284                 value = value[j+1 .. $];
285             }
286         }
287 
288         // It's trying to be a host/port, not a user/pass.
289         i = value.indexOfAny([':', '/', '[']);
290         if (i == -1) {
291             this.host = value.fromPuny;
292             return true;
293         }
294 
295         // Find the hostname. It's either an ipv6 address (which has special rules) or not (which doesn't
296         // have special rules). -- The main sticking point is that ipv6 addresses have colons, which we
297         // handle specially, and are offset with square brackets.
298         if (value[i] == '[') {
299             auto j = value[i..$].indexOf(']');
300             if (j < 0) {
301                 // unterminated ipv6 addr
302                 return false;
303             }
304             // includes square brackets
305             this.host = value[i .. i+j+1];
306             value = value[i+j+1 .. $];
307             if (value.length == 0) {
308                 // read to end of string; we finished parse
309                 return true;
310             }
311             if (value[0] != ':' && value[0] != '?' && value[0] != '/') {
312                 return false;
313             }
314         } else {
315             // Normal host.
316             this.host = value[0..i].fromPuny;
317             value = value[i .. $];
318         }
319 
320         if (value[0] == ':') {
321             auto end = value.indexOf('/');
322             if (end == -1) {
323                 end = value.length;
324             }
325             try {
326                 this.port = value[1 .. end].to!ushort;
327             } catch (ConvException) {
328                 return false;
329             }
330             value = value[end .. $];
331             if (value.length == 0) {
332                 return true;
333             }
334         }
335 
336         return parsePathAndQuery(value);
337     }
338 
339     private bool parsePathAndQuery(string value)
340     {
341         auto i = value.indexOfAny("?#");
342         if (i == -1)
343         {
344             this.path = value.percentDecode;
345             return true;
346         }
347 
348         try
349         {
350             this.path = value[0..i].percentDecode;
351         }
352         catch (UrlException)
353         {
354             return false;
355         }
356 
357         auto c = value[i];
358         value = value[i + 1 .. $];
359         if (c == '?')
360         {
361             i = value.indexOf('#');
362             string query;
363             if (i < 0)
364             {
365                 query = value;
366                 value = null;
367             }
368             else
369             {
370                 query = value[0..i];
371                 value = value[i + 1 .. $];
372             }
373             auto queries = query.split('&');
374             foreach (q; queries)
375             {
376                 auto j = q.indexOf('=');
377                 string key, val;
378                 if (j < 0)
379                 {
380                     key = q;
381                 }
382                 else
383                 {
384                     key = q[0..j];
385                     val = q[j + 1 .. $];
386                 }
387                 try
388                 {
389                     key = key.percentDecode;
390                     val = val.percentDecode;
391                 }
392                 catch (UrlException)
393                 {
394                     return false;
395                 }
396                 this.queryParams.add(key, val);
397             }
398         }
399 
400         try
401         {
402             this.fragment = value.percentDecode;
403         }
404         catch (UrlException)
405         {
406             return false;
407         }
408 
409         return true;
410     }
411 
412 pure:
413     /// The Url scheme. For instance, ssh, ftp, or https.
414     string scheme;
415 
416     /// The username in this Url. Usually absent. If present, there will also be a password.
417     string user;
418 
419     /// The password in this Url. Usually absent.
420     string pass;
421 
422     /// The hostname.
423     string host;
424 
425     /**
426       * The port.
427         *
428       * This is inferred from the scheme if it isn't present in the Url itself.
429       * If the scheme is not known and the port is not present, the port will be given as 0.
430       * For some schemes, port will not be sensible -- for instance, file or chrome-extension.
431       *
432       * If you explicitly need to detect whether the user provided a port, check the providedPort
433       * field.
434       */
435     @property ushort port() const nothrow
436     {
437         if (providedPort != 0) {
438             return providedPort;
439         }
440         if (auto p = scheme in schemeToDefaultPort) {
441             return *p;
442         }
443         return 0;
444     }
445 
446     /**
447       * Set the port.
448         *
449         * This sets the providedPort field and is provided for convenience.
450         */
451     @property ushort port(ushort value) nothrow
452     {
453         return providedPort = value;
454     }
455 
456     /// The port that was explicitly provided in the Url.
457     ushort providedPort;
458 
459     /**
460       * The path.
461       *
462       * For instance, in the Url https://cnn.com/news/story/17774?visited=false, the path is
463       * "/news/story/17774".
464       */
465     string path;
466 
467     /**
468         * The query parameters associated with this Url.
469         */
470     QueryParams queryParams;
471 
472     /**
473       * The fragment. In web documents, this typically refers to an anchor element.
474       * For instance, in the Url https://cnn.com/news/story/17774#header2, the fragment is "header2".
475       */
476     string fragment;
477 
478     /**
479       * Convert this Url to a string.
480       * The string is properly formatted and usable for, eg, a web request.
481       */
482     string toString() const
483     {
484         return toString(false);
485     }
486 
487     /**
488         * Convert this Url to a string.
489         *
490         * The string is intended to be human-readable rather than machine-readable.
491         */
492     string toHumanReadableString() const
493     {
494         return toString(true);
495     }
496 
497     ///
498     unittest
499     {
500         auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseUrl;
501         assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye", url.toString);
502         assert(url.toHumanReadableString == "https://☂.☃.org/?hi=bye", url.toString);
503     }
504 
505     unittest
506     {
507         assert("http://example.org/some_path".parseUrl.toHumanReadableString ==
508                 "http://example.org/some_path");
509     }
510 
511     /**
512       * Convert the path and query string of this Url to a string.
513       */
514     string toPathAndQueryString() const
515     {
516         if (queryParams.length > 0)
517         {
518             return path ~ '?' ~ queryParams.toString;
519         }
520         return path;
521     }
522 
523     ///
524     unittest
525     {
526         auto u = "http://example.org/index?page=12".parseUrl;
527         auto pathAndQuery = u.toPathAndQueryString();
528         assert(pathAndQuery == "/index?page=12", pathAndQuery);
529     }
530 
531     private string toString(bool humanReadable) const
532     {
533         import std.array : Appender;
534         Appender!string s;
535         s ~= scheme;
536         s ~= "://";
537         if (user) {
538             s ~= humanReadable ? user : user.percentEncode;
539             s ~= ":";
540             s ~= humanReadable ? pass : pass.percentEncode;
541             s ~= "@";
542         }
543         s ~= humanReadable ? host : host.toPuny;
544         if (providedPort) {
545             if ((scheme in schemeToDefaultPort) == null || schemeToDefaultPort[scheme] != providedPort) {
546                 s ~= ":";
547                 s ~= providedPort.to!string;
548             }
549         }
550         string p = path;
551         if (p.length == 0 || p == "/") {
552             s ~= '/';
553         } else {
554             if (humanReadable) {
555                 s ~= p;
556             } else {
557                 if (p[0] == '/') {
558                     p = p[1..$];
559                 }
560                 foreach (part; p.split('/')) {
561                     s ~= '/';
562                     s ~= part.percentEncode;
563                 }
564             }
565         }
566         if (queryParams.length) {
567             s ~= '?';
568             s ~= queryParams.toString;
569         }        if (fragment) {
570             s ~= '#';
571             s ~= fragment.percentEncode;
572         }
573         return s.data;
574     }
575 
576     /// Implicitly convert Urls to strings.
577     alias toString this;
578 
579     /**
580       Compare two Urls.
581 
582       I tried to make the comparison produce a sort order that seems natural, so it's not identical
583       to sorting based on .toString(). For instance, username/password have lower priority than
584       host. The scheme has higher priority than port but lower than host.
585 
586       While the output of this is guaranteed to provide a total ordering, and I've attempted to make
587       it human-friendly, it isn't guaranteed to be consistent between versions. The implementation
588       and its results can change without a minor version increase.
589     */
590     int opCmp(const Url other) const
591     {
592         return asTuple.opCmp(other.asTuple);
593     }
594 
595     private auto asTuple() const nothrow
596     {
597         import std.typecons : tuple;
598         return tuple(host, scheme, port, user, pass, path, queryParams);
599     }
600 
601     /// Equality checks.
602     // bool opEquals(string other) const
603     // {
604     //     Url o = parseUrl(other);
605     //     if (!parseUrl(other))
606     //     {
607     //         return false;
608     //     }
609 
610     //     return asTuple() == o.asTuple();
611     // }
612 
613     /// Ditto
614     bool opEquals(ref const Url other) const
615     {
616         return asTuple() == other.asTuple();
617     }
618 
619     /// Ditto
620     bool opEquals(const Url other) const
621     {
622         return asTuple() == other.asTuple();
623     }
624 
625     unittest
626     {
627         import std.algorithm, std.array, std.format;
628         assert("http://example.org/some_path".parseUrl > "http://example.org/other_path".parseUrl);
629         alias sorted = std.algorithm.sort;
630         auto parsedUrls =
631         [
632             "http://example.org/some_path",
633             "http://example.org:81/other_path",
634             "http://example.org/other_path",
635             "https://example.org/first_path",
636             "http://example.xyz/other_other_path",
637             "http://me:secret@blog.ikeran.org/wp_admin",
638         ].map!(x => x.parseUrl).array;
639         auto urls = sorted(parsedUrls).map!(x => x.toHumanReadableString).array;
640         auto expected =
641         [
642             "http://me:secret@blog.ikeran.org/wp_admin",
643             "http://example.org/other_path",
644             "http://example.org/some_path",
645             "http://example.org:81/other_path",
646             "https://example.org/first_path",
647             "http://example.xyz/other_other_path",
648         ];
649         assert(cmp(urls, expected) == 0, "expected:\n%s\ngot:\n%s".format(expected, urls));
650     }
651 
652     unittest
653     {
654         auto a = "http://x.org/a?b=c".parseUrl;
655         auto b = "http://x.org/a?d=e".parseUrl;
656         auto c = "http://x.org/a?b=a".parseUrl;
657         assert(a < b);
658         assert(c < b);
659         assert(c < a);
660     }
661 
662     /**
663         * The append operator (~).
664         *
665         * The append operator for Urls returns a new Url with the given string appended as a path
666         * element to the Url's path. It only adds new path elements (or sequences of path elements).
667         *
668         * Don't worry about path separators; whether you include them or not, it will just work.
669         *
670         * Query elements are copied.
671         *
672         * Examples:
673         * ---
674         * auto random = "http://testdata.org/random".parseUrl;
675         * auto randInt = random ~ "int";
676         * writeln(randInt);  // prints "http://testdata.org/random/int"
677         * ---
678         */
679     Url opBinary(string op : "~")(string subsequentPath) {
680         Url other = this;
681         other ~= subsequentPath;
682         other.queryParams = queryParams.dup;
683         return other;
684     }
685 
686     /**
687         * The append-in-place operator (~=).
688         *
689         * The append operator for Urls adds a path element to this Url. It only adds new path elements
690         * (or sequences of path elements).
691         *
692         * Don't worry about path separators; whether you include them or not, it will just work.
693         *
694         * Examples:
695         * ---
696         * auto random = "http://testdata.org/random".parseUrl;
697         * random ~= "int";
698         * writeln(random);  // prints "http://testdata.org/random/int"
699         * ---
700         */
701     Url opOpAssign(string op : "~")(string subsequentPath) {
702         if (path.endsWith("/")) {
703             if (subsequentPath.startsWith("/")) {
704                 path ~= subsequentPath[1..$];
705             } else {
706                 path ~= subsequentPath;
707             }
708         } else {
709             if (!subsequentPath.startsWith("/")) {
710                 path ~= '/';
711             }
712             path ~= subsequentPath;
713         }
714         return this;
715     }
716 
717     /**
718         * Convert a relative Url to an absolute Url.
719         *
720         * This is designed so that you can scrape a webpage and quickly convert links within the
721         * page to Urls you can actually work with, but you're clever; I'm sure you'll find more uses
722         * for it.
723         *
724         * It's biased toward HTTP family Urls; as one quirk, "//" is interpreted as "same scheme,
725         * different everything else", which might not be desirable for all schemes.
726         *
727         * This only handles Urls, not URIs; if you pass in 'mailto:bob.dobbs@subgenius.org', for
728         * instance, this will give you our best attempt to parse it as a Url.
729         *
730         * Examples:
731         * ---
732         * auto base = "https://example.org/passworddb?secure=false".parseUrl;
733         *
734         * // Download https://example.org/passworddb/by-username/dhasenan
735         * download(base.resolve("by-username/dhasenan"));
736         *
737         * // Download https://example.org/static/style.css
738         * download(base.resolve("/static/style.css"));
739         *
740         * // Download https://cdn.example.net/jquery.js
741         * download(base.resolve("https://cdn.example.net/jquery.js"));
742         * ---
743         */
744     // Url resolve(string other)
745     // {
746     //     if (other.length == 0) return this;
747     //     if (other[0] == '/')
748     //     {
749     //         if (other.length > 1 && other[1] == '/')
750     //         {
751     //             // Uncommon syntax: a link like "//wikimedia.org" means "same scheme, switch Url"
752     //             return parseUrl(this.scheme ~ ':' ~ other);
753     //         }
754     //     }
755     //     else
756     //     {
757     //         auto schemeSep = other.indexOf("://");
758     //         if (schemeSep >= 0 && schemeSep < other.indexOf("/"))
759     //         // separate Url
760     //         {
761     //             return other.parseUrl;
762     //         }
763     //     }
764 
765     //     Url ret = this;
766     //     ret.path = "";
767     //     ret.queryParams = ret.queryParams.init;
768     //     if (other[0] != '/')
769     //     {
770     //         // relative to something
771     //         if (!this.path.length)
772     //         {
773     //             // nothing to be relative to
774     //             other = "/" ~ other;
775     //         }
776     //         else if (this.path[$-1] == '/')
777     //         {
778     //             // directory-style path for the current thing
779     //             // resolve relative to this directory
780     //             other = this.path ~ other;
781     //         }
782     //         else
783     //         {
784     //             // this is a file-like thing
785     //             // find the 'directory' and relative to that
786     //             other = this.path[0..this.path.lastIndexOf('/') + 1] ~ other;
787     //         }
788     //     }
789     //     // collapse /foo/../ to /
790     //     if (other.indexOf("/../") >= 0)
791     //     {
792     //         import std.array : Appender, array;
793     //         import std.string : split;
794     //         import std.algorithm.iteration : joiner, filter;
795     //         string[] parts = other.split('/');
796     //         for (int i = 0; i < parts.length; i++)
797     //         {
798     //             if (parts[i] == "..")
799     //             {
800     //                 for (int j = i - 1; j >= 0; j--)
801     //                 {
802     //                     if (parts[j] != null)
803     //                     {
804     //                         parts[j] = null;
805     //                         parts[i] = null;
806     //                         break;
807     //                     }
808     //                 }
809     //             }
810     //         }
811     //         other = "/" ~ parts.filter!(x => x != null).joiner("/").to!string;
812     //     }
813     //     parsePathAndQuery(ret, other);
814     //     return ret;
815     // }
816 
817     unittest
818     {
819         auto a = "http://alcyius.com/dndtools/index.html".parseUrl;
820         auto b = a.resolve("contacts/index.html");
821         assert(b.toString == "http://alcyius.com/dndtools/contacts/index.html");
822     }
823 
824     unittest
825     {
826         auto a = "http://alcyius.com/dndtools/index.html?a=b".parseUrl;
827         auto b = a.resolve("contacts/index.html?foo=bar");
828         assert(b.toString == "http://alcyius.com/dndtools/contacts/index.html?foo=bar");
829     }
830 
831     unittest
832     {
833         auto a = "http://alcyius.com/dndtools/index.html".parseUrl;
834         auto b = a.resolve("../index.html");
835         assert(b.toString == "http://alcyius.com/index.html", b.toString);
836     }
837 
838     unittest
839     {
840         auto a = "http://alcyius.com/dndtools/foo/bar/index.html".parseUrl;
841         auto b = a.resolve("../index.html");
842         assert(b.toString == "http://alcyius.com/dndtools/foo/index.html", b.toString);
843     }
844 }
845 
846 unittest {
847     {
848         // Basic.
849         Url url;
850         with (url) {
851             scheme = "https";
852             host = "example.org";
853             path = "/foo/bar";
854             queryParams.add("hello", "world");
855             queryParams.add("gibe", "clay");
856             fragment = "frag";
857         }
858         assert(
859                 // Not sure what order it'll come out in.
860                 url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" ||
861                 url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag",
862                 url.toString);
863     }
864     {
865         // Percent encoded.
866         Url url;
867         with (url) {
868             scheme = "https";
869             host = "example.org";
870             path = "/f☃o";
871             queryParams.add("❄", "❀");
872             queryParams.add("[", "]");
873             fragment = "ş";
874         }
875         assert(
876                 // Not sure what order it'll come out in.
877                 url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" ||
878                 url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F",
879                 url.toString);
880     }
881     {
882         // Port, user, pass.
883         Url url;
884         with (url) {
885             scheme = "https";
886             host = "example.org";
887             user = "dhasenan";
888             pass = "itsasecret";
889             port = 17;
890         }
891         assert(
892                 url.toString == "https://dhasenan:itsasecret@example.org:17/",
893                 url.toString);
894     }
895     {
896         // Query with no path.
897         Url url;
898         with (url) {
899             scheme = "https";
900             host = "example.org";
901             queryParams.add("hi", "bye");
902         }
903         assert(
904                 url.toString == "https://example.org/?hi=bye",
905                 url.toString);
906     }
907 }
908 
909 unittest
910 {
911     auto url = "//foo/bar".parseUrl;
912     assert(url.host == "foo", "expected host foo, got " ~ url.host);
913     assert(url.path == "/bar");
914 }
915 
916 unittest
917 {
918     import std.stdio : writeln;
919     auto url = "file:///foo/bar".parseUrl;
920     assert(url.host == null);
921     assert(url.port == 0);
922     assert(url.scheme == "file");
923     assert(url.path == "/foo/bar");
924     assert(url.toString == "file:///foo/bar");
925     assert(url.queryParams.empty);
926     assert(url.fragment == null);
927 }
928 
929 unittest
930 {
931     // ipv6 hostnames!
932     {
933         // full range of data
934         auto url = parseUrl("https://bob:secret@[::1]:2771/foo/bar");
935         assert(url.scheme == "https", url.scheme);
936         assert(url.user == "bob", url.user);
937         assert(url.pass == "secret", url.pass);
938         assert(url.host == "[::1]", url.host);
939         assert(url.port == 2771, url.port.to!string);
940         assert(url.path == "/foo/bar", url.path);
941     }
942 
943     // minimal
944     {
945         auto url = parseUrl("[::1]");
946         assert(url.host == "[::1]", url.host);
947     }
948 
949     // some random bits
950     {
951         auto url = parseUrl("http://[::1]/foo");
952         assert(url.scheme == "http", url.scheme);
953         assert(url.host == "[::1]", url.host);
954         assert(url.path == "/foo", url.path);
955     }
956 
957     {
958         auto url = parseUrl("https://[2001:0db8:0:0:0:0:1428:57ab]/?login=true#justkidding");
959         assert(url.scheme == "https");
960         assert(url.host == "[2001:0db8:0:0:0:0:1428:57ab]");
961         assert(url.path == "/");
962         assert(url.fragment == "justkidding");
963     }
964 }
965 
966 unittest
967 {
968     auto url = "localhost:5984".parseUrl;
969     auto url2 = url ~ "db1";
970     assert(url2.toString == "http://localhost:5984/db1", url2.toString);
971     auto url3 = url2 ~ "_all_docs";
972     assert(url3.toString == "http://localhost:5984/db1/_all_docs", url3.toString);
973 }
974 
975 ///
976 unittest {
977     {
978         // Basic.
979         Url url;
980         with (url) {
981             scheme = "https";
982             host = "example.org";
983             path = "/foo/bar";
984             queryParams.add("hello", "world");
985             queryParams.add("gibe", "clay");
986             fragment = "frag";
987         }
988         assert(
989                 // Not sure what order it'll come out in.
990                 url.toString == "https://example.org/foo/bar?hello=world&gibe=clay#frag" ||
991                 url.toString == "https://example.org/foo/bar?gibe=clay&hello=world#frag",
992                 url.toString);
993     }
994     {
995         // Passing an array of query values.
996         Url url;
997         with (url) {
998             scheme = "https";
999             host = "example.org";
1000             path = "/foo/bar";
1001             queryParams.add("hello", "world");
1002             queryParams.add("hello", "aether");
1003             fragment = "frag";
1004         }
1005         assert(
1006                 // Not sure what order it'll come out in.
1007                 url.toString == "https://example.org/foo/bar?hello=world&hello=aether#frag" ||
1008                 url.toString == "https://example.org/foo/bar?hello=aether&hello=world#frag",
1009                 url.toString);
1010     }
1011     {
1012         // Percent encoded.
1013         Url url;
1014         with (url) {
1015             scheme = "https";
1016             host = "example.org";
1017             path = "/f☃o";
1018             queryParams.add("❄", "❀");
1019             queryParams.add("[", "]");
1020             fragment = "ş";
1021         }
1022         assert(
1023                 // Not sure what order it'll come out in.
1024                 url.toString == "https://example.org/f%E2%98%83o?%E2%9D%84=%E2%9D%80&%5B=%5D#%C5%9F" ||
1025                 url.toString == "https://example.org/f%E2%98%83o?%5B=%5D&%E2%9D%84=%E2%9D%80#%C5%9F",
1026                 url.toString);
1027     }
1028     {
1029         // Port, user, pass.
1030         Url url;
1031         with (url) {
1032             scheme = "https";
1033             host = "example.org";
1034             user = "dhasenan";
1035             pass = "itsasecret";
1036             port = 17;
1037         }
1038         assert(
1039                 url.toString == "https://dhasenan:itsasecret@example.org:17/",
1040                 url.toString);
1041     }
1042     {
1043         // Query with no path.
1044         Url url;
1045         with (url) {
1046             scheme = "https";
1047             host = "example.org";
1048             queryParams.add("hi", "bye");
1049         }
1050         assert(
1051                 url.toString == "https://example.org/?hi=bye",
1052                 url.toString);
1053     }
1054 }
1055 
1056 unittest {
1057     // Percent decoding.
1058 
1059     // http://#:!:@
1060     auto urlString = "http://%23:%21%3A@example.org/%7B/%7D?%3B&%26=%3D#%23hash%EF%BF%BD";
1061     auto url = urlString.parseUrl;
1062     assert(url.user == "#");
1063     assert(url.pass == "!:");
1064     assert(url.host == "example.org");
1065     assert(url.path == "/{/}");
1066     assert(url.queryParams[";"].front == "");
1067     assert(url.queryParams["&"].front == "=");
1068     assert(url.fragment == "#hash�");
1069 
1070     // Round trip.
1071     assert(urlString == urlString.parseUrl.toString, urlString.parseUrl.toString);
1072     assert(urlString == urlString.parseUrl.toString.parseUrl.toString);
1073 }
1074 
1075 unittest {
1076     auto url = "https://xn--m3h.xn--n3h.org/?hi=bye".parseUrl;
1077     assert(url.host == "☂.☃.org", url.host);
1078 }
1079 
1080 unittest {
1081     auto url = "https://☂.☃.org/?hi=bye".parseUrl;
1082     assert(url.toString == "https://xn--m3h.xn--n3h.org/?hi=bye");
1083 }
1084 
1085 ///
1086 unittest {
1087     // There's an existing path.
1088     auto url = parseUrl("http://example.org/foo");
1089     Url url2;
1090     // No slash? Assume it needs a slash.
1091     assert((url ~ "bar").toString == "http://example.org/foo/bar");
1092     // With slash? Don't add another.
1093     url2 = url ~ "/bar";
1094     assert(url2.toString == "http://example.org/foo/bar", url2.toString);
1095     url ~= "bar";
1096     assert(url.toString == "http://example.org/foo/bar");
1097 
1098     // Path already ends with a slash; don't add another.
1099     url = parseUrl("http://example.org/foo/");
1100     assert((url ~ "bar").toString == "http://example.org/foo/bar");
1101     // Still don't add one even if you're appending with a slash.
1102     assert((url ~ "/bar").toString == "http://example.org/foo/bar");
1103     url ~= "/bar";
1104     assert(url.toString == "http://example.org/foo/bar");
1105 
1106     // No path.
1107     url = parseUrl("http://example.org");
1108     assert((url ~ "bar").toString == "http://example.org/bar");
1109     assert((url ~ "/bar").toString == "http://example.org/bar");
1110     url ~= "bar";
1111     assert(url.toString == "http://example.org/bar");
1112 
1113     // Path is just a slash.
1114     url = parseUrl("http://example.org/");
1115     assert((url ~ "bar").toString == "http://example.org/bar");
1116     assert((url ~ "/bar").toString == "http://example.org/bar");
1117     url ~= "bar";
1118     assert(url.toString == "http://example.org/bar", url.toString);
1119 
1120     // No path, just fragment.
1121     url = "ircs://irc.freenode.com/#d".parseUrl;
1122     assert(url.toString == "ircs://irc.freenode.com/#d", url.toString);
1123 }
1124 unittest
1125 {
1126     // basic resolve()
1127     {
1128         auto base = "https://example.org/this/".parseUrl;
1129         assert(base.resolve("that") == "https://example.org/this/that");
1130         assert(base.resolve("/that") == "https://example.org/that");
1131         assert(base.resolve("//example.net/that") == "https://example.net/that");
1132     }
1133 
1134     // ensure we don't preserve query params
1135     {
1136         auto base = "https://example.org/this?query=value&other=value2".parseUrl;
1137         assert(base.resolve("that") == "https://example.org/that");
1138         assert(base.resolve("/that") == "https://example.org/that");
1139         assert(base.resolve("tother/that") == "https://example.org/tother/that");
1140         assert(base.resolve("//example.net/that") == "https://example.net/that");
1141     }
1142 }
1143 
1144 
1145 unittest
1146 {
1147     import std.net.curl;
1148     auto url = "http://example.org".parseUrl;
1149     assert(is(typeof(std.net.curl.get(url))));
1150 }
1151 
1152 /**
1153     * Parse the input string as a Url.
1154     *
1155     * Throws:
1156     *   UrlException if the string was in an incorrect format.
1157     */
1158 // Url parseUrl(string value) {
1159 //     return Url(value);
1160 // }
1161 
1162 ///
1163 unittest {
1164     {
1165         // Infer scheme
1166         auto u1 = parseUrl("example.org");
1167         assert(u1.scheme == "http");
1168         assert(u1.host == "example.org");
1169         assert(u1.path == "");
1170         assert(u1.port == 80);
1171         assert(u1.providedPort == 0);
1172         assert(u1.fragment == "");
1173     }
1174     {
1175         // Simple host and scheme
1176         auto u1 = parseUrl("https://example.org");
1177         assert(u1.scheme == "https");
1178         assert(u1.host == "example.org");
1179         assert(u1.path == "");
1180         assert(u1.port == 443);
1181         assert(u1.providedPort == 0);
1182     }
1183     {
1184         // With path
1185         auto u1 = parseUrl("https://example.org/foo/bar");
1186         assert(u1.scheme == "https");
1187         assert(u1.host == "example.org");
1188         assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path);
1189         assert(u1.port == 443);
1190         assert(u1.providedPort == 0);
1191     }
1192     {
1193         // With explicit port
1194         auto u1 = parseUrl("https://example.org:1021/foo/bar");
1195         assert(u1.scheme == "https");
1196         assert(u1.host == "example.org");
1197         assert(u1.path == "/foo/bar", "expected /foo/bar but got " ~ u1.path);
1198         assert(u1.port == 1021);
1199         assert(u1.providedPort == 1021);
1200     }
1201     {
1202         // With user
1203         auto u1 = parseUrl("https://bob:secret@example.org/foo/bar");
1204         assert(u1.scheme == "https");
1205         assert(u1.host == "example.org");
1206         assert(u1.path == "/foo/bar");
1207         assert(u1.port == 443);
1208         assert(u1.user == "bob");
1209         assert(u1.pass == "secret");
1210     }
1211     {
1212         // With user, Url-encoded
1213         auto u1 = parseUrl("https://bob%21:secret%21%3F@example.org/foo/bar");
1214         assert(u1.scheme == "https");
1215         assert(u1.host == "example.org");
1216         assert(u1.path == "/foo/bar");
1217         assert(u1.port == 443);
1218         assert(u1.user == "bob!");
1219         assert(u1.pass == "secret!?");
1220     }
1221     {
1222         // With user and port and path
1223         auto u1 = parseUrl("https://bob:secret@example.org:2210/foo/bar");
1224         assert(u1.scheme == "https");
1225         assert(u1.host == "example.org");
1226         assert(u1.path == "/foo/bar");
1227         assert(u1.port == 2210);
1228         assert(u1.user == "bob");
1229         assert(u1.pass == "secret");
1230         assert(u1.fragment == "");
1231     }
1232     {
1233         // With query string
1234         auto u1 = parseUrl("https://example.org/?login=true");
1235         assert(u1.scheme == "https");
1236         assert(u1.host == "example.org");
1237         assert(u1.path == "/", "expected path: / actual path: " ~ u1.path);
1238         assert(u1.queryParams["login"].front == "true");
1239         assert(u1.fragment == "");
1240     }
1241     {
1242         // With query string and fragment
1243         auto u1 = parseUrl("https://example.org/?login=true#justkidding");
1244         assert(u1.scheme == "https");
1245         assert(u1.host == "example.org");
1246         assert(u1.path == "/", "expected path: / actual path: " ~ u1.path);
1247         assert(u1.queryParams["login"].front == "true");
1248         assert(u1.fragment == "justkidding");
1249     }
1250     {
1251         // With Url-encoded values
1252         auto u1 = parseUrl("https://example.org/%E2%98%83?%E2%9D%84=%3D#%5E");
1253         assert(u1.scheme == "https");
1254         assert(u1.host == "example.org");
1255         assert(u1.path == "/☃", "expected path: /☃ actual path: " ~ u1.path);
1256         assert(u1.queryParams["❄"].front == "=");
1257         assert(u1.fragment == "^");
1258     }
1259 }
1260 
1261 unittest {
1262     assert(parseUrl("http://example.org").port == 80);
1263     assert(parseUrl("http://example.org:5326").port == 5326);
1264 
1265     auto url = parseUrl("redis://admin:password@redisbox.local:2201/path?query=value#fragment");
1266     assert(url.scheme == "redis");
1267     assert(url.user == "admin");
1268     assert(url.pass == "password");
1269 
1270     assert(parseUrl("example.org").toString == "http://example.org/");
1271     assert(parseUrl("http://example.org:80").toString == "http://example.org/");
1272 
1273     assert(parseUrl("localhost:8070").toString == "http://localhost:8070/");
1274 }
1275 
1276 /**
1277     * Percent-encode a string.
1278     *
1279     * Url components cannot contain non-ASCII characters, and there are very few characters that are
1280     * safe to include as Url components. Domain names using Unicode values use Punycode. For
1281     * everything else, there is percent encoding.
1282     */
1283 string percentEncode(string raw) {
1284     // We *must* encode these characters: :/?#[]@!$&'()*+,;="
1285     // We *can* encode any other characters.
1286     // We *should not* encode alpha, numeric, or -._~.
1287     import std.utf : encode;
1288     import std.array : Appender;
1289     Appender!string app;
1290     foreach (dchar d; raw) {
1291         if (('a' <= d && 'z' >= d) ||
1292                 ('A' <= d && 'Z' >= d) ||
1293                 ('0' <= d && '9' >= d) ||
1294                 d == '-' || d == '.' || d == '_' || d == '~') {
1295             app ~= d;
1296             continue;
1297         }
1298         // Something simple like a space character? Still in 7-bit ASCII?
1299         // Then we get a single-character string out of it and just encode
1300         // that one bit.
1301         // Something not in 7-bit ASCII? Then we percent-encode each octet
1302         // in the UTF-8 encoding (and hope the server understands UTF-8).
1303         char[] c;
1304         encode(c, d);
1305         auto bytes = cast(ubyte[])c;
1306         foreach (b; bytes) {
1307             app ~= format("%%%02X", b);
1308         }
1309     }
1310     return cast(string)app.data;
1311 }
1312 
1313 ///
1314 unittest {
1315     assert(percentEncode("IDontNeedNoPercentEncoding") == "IDontNeedNoPercentEncoding");
1316     assert(percentEncode("~~--..__") == "~~--..__");
1317     assert(percentEncode("0123456789") == "0123456789");
1318 
1319     string e;
1320 
1321     e = percentEncode("☃");
1322     assert(e == "%E2%98%83", "expected %E2%98%83 but got" ~ e);
1323 }
1324 
1325 /**
1326     * Percent-decode a string.
1327     *
1328     * Url components cannot contain non-ASCII characters, and there are very few characters that are
1329     * safe to include as Url components. Domain names using Unicode values use Punycode. For
1330     * everything else, there is percent encoding.
1331     *
1332     * This explicitly ensures that the result is a valid UTF-8 string.
1333     */
1334 string percentDecode(string encoded)
1335 {
1336     import std.utf : validate, UTFException;
1337     auto raw = percentDecodeRaw(encoded);
1338     auto s = cast(string) raw;
1339     try
1340     {
1341         validate(s);
1342     }
1343     catch (UTFException e)
1344     {
1345         throw new UrlException(
1346                 "The percent-encoded data `" ~ encoded ~ "` does not represent a valid UTF-8 sequence.");
1347     }
1348     return s;
1349 }
1350 
1351 ///
1352 unittest {
1353     assert(percentDecode("IDontNeedNoPercentDecoding") == "IDontNeedNoPercentDecoding");
1354     assert(percentDecode("~~--..__") == "~~--..__");
1355     assert(percentDecode("0123456789") == "0123456789");
1356 
1357     string e;
1358 
1359     e = percentDecode("%E2%98%83");
1360     assert(e == "☃", "expected a snowman but got" ~ e);
1361 
1362     e = percentDecode("%e2%98%83");
1363     assert(e == "☃", "expected a snowman but got" ~ e);
1364 
1365     try {
1366         // %ES is an invalid percent sequence: 'S' is not a hex digit.
1367         percentDecode("%es");
1368         assert(false, "expected exception not thrown");
1369     } catch (UrlException) {
1370     }
1371 
1372     try {
1373         percentDecode("%e");
1374         assert(false, "expected exception not thrown");
1375     } catch (UrlException) {
1376     }
1377 }
1378 
1379 /**
1380     * Percent-decode a string into a ubyte array.
1381     *
1382     * Url components cannot contain non-ASCII characters, and there are very few characters that are
1383     * safe to include as Url components. Domain names using Unicode values use Punycode. For
1384     * everything else, there is percent encoding.
1385     *
1386     * This yields a ubyte array and will not perform validation on the output. However, an improperly
1387     * formatted input string will result in a UrlException.
1388     */
1389 immutable(ubyte)[] percentDecodeRaw(string encoded)
1390 {
1391     // We're dealing with possibly incorrectly encoded UTF-8. Mark it down as ubyte[] for now.
1392     import std.array : Appender;
1393     Appender!(immutable(ubyte)[]) app;
1394     for (int i = 0; i < encoded.length; i++) {
1395         if (encoded[i] != '%') {
1396             app ~= encoded[i];
1397             continue;
1398         }
1399         if (i >= encoded.length - 2) {
1400             throw new UrlException("Invalid percent encoded value: expected two characters after " ~
1401                     "percent symbol. Error at index " ~ i.to!string);
1402         }
1403         if (isHex(encoded[i + 1]) && isHex(encoded[i + 2])) {
1404             auto b = fromHex(encoded[i + 1]);
1405             auto c = fromHex(encoded[i + 2]);
1406             app ~= cast(ubyte)((b << 4) | c);
1407         } else {
1408             throw new UrlException("Invalid percent encoded value: expected two hex digits after " ~
1409                     "percent symbol. Error at index " ~ i.to!string);
1410         }
1411         i += 2;
1412     }
1413     return app.data;
1414 }
1415 
1416 private bool isHex(char c) {
1417     return ('0' <= c && '9' >= c) ||
1418         ('a' <= c && 'f' >= c) ||
1419         ('A' <= c && 'F' >= c);
1420 }
1421 
1422 private ubyte fromHex(char s) {
1423     enum caseDiff = 'a' - 'A';
1424     if (s >= 'a' && s <= 'z') {
1425         s -= caseDiff;
1426     }
1427     return cast(ubyte)("0123456789ABCDEF".indexOf(s));
1428 }
1429 
1430 private string toPuny(string unicodeHostname)
1431 {
1432     if (unicodeHostname.length == 0) return "";
1433     if (unicodeHostname[0] == '[')
1434     {
1435         // It's an ipv6 name.
1436         return unicodeHostname;
1437     }
1438     bool mustEncode = false;
1439     foreach (i, dchar d; unicodeHostname) {
1440         auto c = cast(uint) d;
1441         if (c > 0x80) {
1442             mustEncode = true;
1443             break;
1444         }
1445         if (c < 0x2C || (c >= 0x3A && c <= 40) || (c >= 0x5B && c <= 0x60) || (c >= 0x7B)) {
1446             throw new UrlException(
1447                     format(
1448                         "domain name '%s' contains illegal character '%s' at position %s",
1449                         unicodeHostname, d, i));
1450         }
1451     }
1452     if (!mustEncode) {
1453         return unicodeHostname;
1454     }
1455     import std.algorithm.iteration : map;
1456     return unicodeHostname.split('.').map!punyEncode.join(".");
1457 }
1458 
1459 private string fromPuny(string hostname)
1460 {
1461     import std.algorithm.iteration : map;
1462     return hostname.split('.').map!punyDecode.join(".");
1463 }
1464 
1465 private {
1466     enum delimiter = '-';
1467     enum marker = "xn--";
1468     enum ulong damp = 700;
1469     enum ulong tmin = 1;
1470     enum ulong tmax = 26;
1471     enum ulong skew = 38;
1472     enum ulong base = 36;
1473     enum ulong initialBias = 72;
1474     enum dchar initialN = cast(dchar)128;
1475 
1476     ulong adapt(ulong delta, ulong numPoints, bool firstTime) {
1477         if (firstTime) {
1478             delta /= damp;
1479         } else {
1480             delta /= 2;
1481         }
1482         delta += delta / numPoints;
1483         ulong k = 0;
1484         while (delta > ((base - tmin) * tmax) / 2) {
1485             delta /= (base - tmin);
1486             k += base;
1487         }
1488         return k + (((base - tmin + 1) * delta) / (delta + skew));
1489     }
1490 }
1491 
1492 /**
1493     * Encode the input string using the Punycode algorithm.
1494     *
1495     * Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked
1496     * with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com"
1497     * in Punycode, you will get "xn--m3h.xn--n3h.com".
1498     *
1499     * In order to puny-encode a domain name, you must split it into its components. The following will
1500     * typically suffice:
1501     * ---
1502     * auto domain = "☂.☃.com";
1503     * auto encodedDomain = domain.splitter(".").map!(punyEncode).join(".");
1504     * ---
1505     */
1506 string punyEncode(string input)
1507 {
1508     import std.array : Appender;
1509     ulong delta = 0;
1510     dchar n = initialN;
1511     auto i = 0;
1512     auto bias = initialBias;
1513     Appender!string output;
1514     output ~= marker;
1515     auto pushed = 0;
1516     auto codePoints = 0;
1517     foreach (dchar c; input) {
1518         codePoints++;
1519         if (c <= initialN) {
1520             output ~= c;
1521             pushed++;
1522         }
1523     }
1524     if (pushed < codePoints) {
1525         if (pushed > 0) {
1526             output ~= delimiter;
1527         }
1528     } else {
1529         // No encoding to do.
1530         return input;
1531     }
1532     bool first = true;
1533     while (pushed < codePoints) {
1534         auto best = dchar.max;
1535         foreach (dchar c; input) {
1536             if (n <= c && c < best) {
1537                 best = c;
1538             }
1539         }
1540         if (best == dchar.max) {
1541             throw new UrlException("failed to find a new codepoint to process during punyencode");
1542         }
1543         delta += (best - n) * (pushed + 1);
1544         if (delta > uint.max) {
1545             // TODO better error message
1546             throw new UrlException("overflow during punyencode");
1547         }
1548         n = best;
1549         foreach (dchar c; input) {
1550             if (c < n) {
1551                 delta++;
1552             }
1553             if (c == n) {
1554                 ulong q = delta;
1555                 auto k = base;
1556                 while (true) {
1557                     ulong t;
1558                     if (k <= bias) {
1559                         t = tmin;
1560                     } else if (k >= bias + tmax) {
1561                         t = tmax;
1562                     } else {
1563                         t = k - bias;
1564                     }
1565                     if (q < t) {
1566                         break;
1567                     }
1568                     output ~= digitToBasic(t + ((q - t) % (base - t)));
1569                     q = (q - t) / (base - t);
1570                     k += base;
1571                 }
1572                 output ~= digitToBasic(q);
1573                 pushed++;
1574                 bias = adapt(delta, pushed, first);
1575                 first = false;
1576                 delta = 0;
1577             }
1578         }
1579         delta++;
1580         n++;
1581     }
1582     return cast(string)output.data;
1583 }
1584 
1585 /**
1586     * Decode the input string using the Punycode algorithm.
1587     *
1588     * Punycode is used to encode UTF domain name segment. A Punycode-encoded segment will be marked
1589     * with "xn--". Each segment is encoded separately. For instance, if you wish to encode "☂.☃.com"
1590     * in Punycode, you will get "xn--m3h.xn--n3h.com".
1591     *
1592     * In order to puny-decode a domain name, you must split it into its components. The following will
1593     * typically suffice:
1594     * ---
1595     * auto domain = "xn--m3h.xn--n3h.com";
1596     * auto decodedDomain = domain.splitter(".").map!(punyDecode).join(".");
1597     * ---
1598     */
1599 string punyDecode(string input) {
1600     if (!input.startsWith(marker)) {
1601         return input;
1602     }
1603     input = input[marker.length..$];
1604 
1605     // let n = initial_n
1606     dchar n = cast(dchar)128;
1607 
1608     // let i = 0
1609     // let bias = initial_bias
1610     // let output = an empty string indexed from 0
1611     size_t i = 0;
1612     auto bias = initialBias;
1613     dchar[] output;
1614     // This reserves a bit more than necessary, but it should be more efficient overall than just
1615     // appending and inserting volo-nolo.
1616     output.reserve(input.length);
1617 
1618      // consume all code points before the last delimiter (if there is one)
1619      //   and copy them to output, fail on any non-basic code point
1620      // if more than zero code points were consumed then consume one more
1621      //   (which will be the last delimiter)
1622     auto end = input.lastIndexOf(delimiter);
1623     if (end > -1) {
1624         foreach (dchar c; input[0..end]) {
1625             output ~= c;
1626         }
1627         input = input[end+1 .. $];
1628     }
1629 
1630      // while the input is not exhausted do begin
1631     size_t pos = 0;
1632     while (pos < input.length) {
1633      //   let oldi = i
1634      //   let w = 1
1635         auto oldi = i;
1636         auto w = 1;
1637      //   for k = base to infinity in steps of base do begin
1638         for (ulong k = base; k < uint.max; k += base) {
1639      //     consume a code point, or fail if there was none to consume
1640             // Note that the input is all ASCII, so we can simply index the input string bytewise.
1641             auto c = input[pos];
1642             pos++;
1643      //     let digit = the code point's digit-value, fail if it has none
1644             auto digit = basicToDigit(c);
1645      //     let i = i + digit * w, fail on overflow
1646             i += digit * w;
1647      //     let t = tmin if k <= bias {+ tmin}, or
1648      //             tmax if k >= bias + tmax, or k - bias otherwise
1649             ulong t;
1650             if (k <= bias) {
1651                 t = tmin;
1652             } else if (k >= bias + tmax) {
1653                 t = tmax;
1654             } else {
1655                 t = k - bias;
1656             }
1657      //     if digit < t then break
1658             if (digit < t) {
1659                 break;
1660             }
1661      //     let w = w * (base - t), fail on overflow
1662             w *= (base - t);
1663      //   end
1664         }
1665      //   let bias = adapt(i - oldi, length(output) + 1, test oldi is 0?)
1666         bias = adapt(i - oldi, output.length + 1, oldi == 0);
1667      //   let n = n + i div (length(output) + 1), fail on overflow
1668         n += i / (output.length + 1);
1669      //   let i = i mod (length(output) + 1)
1670         i %= (output.length + 1);
1671      //   {if n is a basic code point then fail}
1672         // (We aren't actually going to fail here; it's clear what this means.)
1673      //   insert n into output at position i
1674         import std.array : insertInPlace;
1675         (() @trusted { output.insertInPlace(i, cast(dchar)n); })();  // should be @safe but isn't marked
1676      //   increment i
1677         i++;
1678      // end
1679     }
1680     return output.to!string;
1681 }
1682 
1683 // Lifted from punycode.js.
1684 private dchar digitToBasic(ulong digit) {
1685     return cast(dchar)(digit + 22 + 75 * (digit < 26));
1686 }
1687 
1688 // Lifted from punycode.js.
1689 private uint basicToDigit(char c) {
1690     auto codePoint = cast(uint)c;
1691     if (codePoint - 48 < 10) {
1692         return codePoint - 22;
1693     }
1694     if (codePoint - 65 < 26) {
1695         return codePoint - 65;
1696     }
1697     if (codePoint - 97 < 26) {
1698         return codePoint - 97;
1699     }
1700     return base;
1701 }
1702 
1703 unittest {
1704     {
1705         auto a = "b\u00FCcher";
1706         assert(punyEncode(a) == "xn--bcher-kva");
1707     }
1708     {
1709         auto a = "b\u00FCc\u00FCher";
1710         assert(punyEncode(a) == "xn--bcher-kvab");
1711     }
1712     {
1713         auto a = "ýbücher";
1714         auto b = punyEncode(a);
1715         assert(b == "xn--bcher-kvaf", b);
1716     }
1717 
1718     {
1719         auto a = "mañana";
1720         assert(punyEncode(a) == "xn--maana-pta");
1721     }
1722 
1723     {
1724         auto a = "\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644"
1725             ~ "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F";
1726         auto b = punyEncode(a);
1727         assert(b == "xn--egbpdaj6bu4bxfgehfvwxn", b);
1728     }
1729     import std.stdio;
1730 }
1731 
1732 unittest {
1733     {
1734         auto b = punyDecode("xn--egbpdaj6bu4bxfgehfvwxn");
1735         assert(b == "ليهمابتكلموشعربي؟", b);
1736     }
1737     {
1738         assert(punyDecode("xn--maana-pta") == "mañana");
1739     }
1740 }
1741 
1742 unittest {
1743     import std.string, std.algorithm, std.array, std.range;
1744     {
1745         auto domain = "xn--m3h.xn--n3h.com";
1746         auto decodedDomain = domain.splitter(".").map!(punyDecode).join(".");
1747         assert(decodedDomain == "☂.☃.com", decodedDomain);
1748     }
1749     {
1750         auto domain = "☂.☃.com";
1751         auto decodedDomain = domain.splitter(".").map!(punyEncode).join(".");
1752         assert(decodedDomain == "xn--m3h.xn--n3h.com", decodedDomain);
1753     }
1754 }
1755