diff options
author | Tobias Kortkamp <tobik@FreeBSD.org> | 2019-04-19 07:34:43 +0000 |
---|---|---|
committer | Tobias Kortkamp <tobik@FreeBSD.org> | 2019-04-19 07:34:43 +0000 |
commit | 3ed0da448ac3413a4c54fb9cab12fc82a576ef0c (patch) | |
tree | 766859a000ba3ce36a7c99627a4e508029c69547 /databases/mongodb40-tools/files/patch-common_util_file.go | |
parent | Update Flacon to version 5.3.0. (diff) |
New port: databases/mongodb40-tools
Useful utilities for managing a MongoDB instance written in Go.
- bsondump - display BSON files in a human-readable format
- mongoimport - Convert data from JSON, TSV or CSV and insert them
into a collection
- mongoexport - Write an existing collection to CSV or JSON format
- mongodump/mongorestore - Dump MongoDB backups to disk in .BSON
format, or restore them to a live database
- mongostat - Monitor live MongoDB servers, replica sets, or sharded
clusters
- mongofiles - Read, write, delete, or update files in GridFS
- mongotop - Monitor read/write activity on a mongo server
- mongoreplay - Capture, observe, and replay traffic for MongoDB
WWW: https://github.com/mongodb/mongo-tools
PR: 237352
Submitted by: Andrew Shevchuk <dev.ashevchuk@gmail.com> (based on)
Diffstat (limited to 'databases/mongodb40-tools/files/patch-common_util_file.go')
-rw-r--r-- | databases/mongodb40-tools/files/patch-common_util_file.go | 325 |
1 files changed, 325 insertions, 0 deletions
diff --git a/databases/mongodb40-tools/files/patch-common_util_file.go b/databases/mongodb40-tools/files/patch-common_util_file.go new file mode 100644 index 000000000000..c80484150d59 --- /dev/null +++ b/databases/mongodb40-tools/files/patch-common_util_file.go @@ -0,0 +1,325 @@ +--- common/util/file.go.orig 2018-11-21 17:52:58 UTC ++++ common/util/file.go +@@ -9,11 +9,68 @@ + import ( + "bufio" + "io" +- "net/url" + "os" + "path/filepath" ++ "strconv" + ) + ++// Error reports an error and the operation and URL that caused it. ++type Error struct { ++ Op string ++ URL string ++ Err error ++} ++ ++func (e *Error) Error() string { return e.Op + " " + e.URL + ": " + e.Err.Error() } ++ ++func ishex(c byte) bool { ++ switch { ++ case '0' <= c && c <= '9': ++ return true ++ case 'a' <= c && c <= 'f': ++ return true ++ case 'A' <= c && c <= 'F': ++ return true ++ } ++ return false ++} ++ ++func unhex(c byte) byte { ++ switch { ++ case '0' <= c && c <= '9': ++ return c - '0' ++ case 'a' <= c && c <= 'f': ++ return c - 'a' + 10 ++ case 'A' <= c && c <= 'F': ++ return c - 'A' + 10 ++ } ++ return 0 ++} ++ ++type encoding int ++ ++const ( ++ encodePath encoding = 1 + iota ++ encodePathSegment ++ encodeHost ++ encodeZone ++ encodeUserPassword ++ encodeQueryComponent ++ encodeFragment ++) ++ ++type EscapeError string ++ ++func (e EscapeError) Error() string { ++ return "invalid URL escape " + strconv.Quote(string(e)) ++} ++ ++type InvalidHostError string ++ ++func (e InvalidHostError) Error() string { ++ return "invalid character " + strconv.Quote(string(e)) + " in host name" ++} ++ + // GetFieldsFromFile fetches the first line from the contents of the file + // at "path" + func GetFieldsFromFile(path string) ([]string, error) { +@@ -42,11 +99,11 @@ + } + + func EscapeCollectionName(collName string) string { +- return url.PathEscape(collName) ++ return PathEscape(collName) + } + + func UnescapeCollectionName(escapedCollName string) (string, error) { +- return url.PathUnescape(escapedCollName) ++ return PathUnescape(escapedCollName) + } + + type WrappedReadCloser struct { +@@ -76,3 +133,238 @@ + } + return innerErr + } ++ ++// Return true if the specified character should be escaped when ++// appearing in a URL string, according to RFC 3986. ++// ++// Please be informed that for now shouldEscape does not check all ++// reserved characters correctly. See golang.org/issue/5684. ++func shouldEscape(c byte, mode encoding) bool { ++ // §2.3 Unreserved characters (alphanum) ++ if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' { ++ return false ++ } ++ ++ if mode == encodeHost || mode == encodeZone { ++ // §3.2.2 Host allows ++ // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" ++ // as part of reg-name. ++ // We add : because we include :port as part of host. ++ // We add [ ] because we include [ipv6]:port as part of host. ++ // We add < > because they're the only characters left that ++ // we could possibly allow, and Parse will reject them if we ++ // escape them (because hosts can't use %-encoding for ++ // ASCII bytes). ++ switch c { ++ case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"': ++ return false ++ } ++ } ++ ++ switch c { ++ case '-', '_', '.', '~': // §2.3 Unreserved characters (mark) ++ return false ++ ++ case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved) ++ // Different sections of the URL allow a few of ++ // the reserved characters to appear unescaped. ++ switch mode { ++ case encodePath: // §3.3 ++ // The RFC allows : @ & = + $ but saves / ; , for assigning ++ // meaning to individual path segments. This package ++ // only manipulates the path as a whole, so we allow those ++ // last three as well. That leaves only ? to escape. ++ return c == '?' ++ ++ case encodePathSegment: // §3.3 ++ // The RFC allows : @ & = + $ but saves / ; , for assigning ++ // meaning to individual path segments. ++ return c == '/' || c == ';' || c == ',' || c == '?' ++ ++ case encodeUserPassword: // §3.2.1 ++ // The RFC allows ';', ':', '&', '=', '+', '$', and ',' in ++ // userinfo, so we must escape only '@', '/', and '?'. ++ // The parsing of userinfo treats ':' as special so we must escape ++ // that too. ++ return c == '@' || c == '/' || c == '?' || c == ':' ++ ++ case encodeQueryComponent: // §3.4 ++ // The RFC reserves (so we must escape) everything. ++ return true ++ ++ case encodeFragment: // §4.1 ++ // The RFC text is silent but the grammar allows ++ // everything, so escape nothing. ++ return false ++ } ++ } ++ ++ if mode == encodeFragment { ++ // RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are ++ // included in reserved from RFC 2396 §2.2. The remaining sub-delims do not ++ // need to be escaped. To minimize potential breakage, we apply two restrictions: ++ // (1) we always escape sub-delims outside of the fragment, and (2) we always ++ // escape single quote to avoid breaking callers that had previously assumed that ++ // single quotes would be escaped. See issue #19917. ++ switch c { ++ case '!', '(', ')', '*': ++ return false ++ } ++ } ++ ++ // Everything else must be escaped. ++ return true ++} ++ ++// PathUnescape does the inverse transformation of PathEscape, ++// converting each 3-byte encoded substring of the form "%AB" into the ++// hex-decoded byte 0xAB. It returns an error if any % is not followed ++// by two hexadecimal digits. ++// ++// PathUnescape is identical to QueryUnescape except that it does not ++// unescape '+' to ' ' (space). ++func PathUnescape(s string) (string, error) { ++ return unescape(s, encodePathSegment) ++} ++ ++// unescape unescapes a string; the mode specifies ++// which section of the URL string is being unescaped. ++func unescape(s string, mode encoding) (string, error) { ++ // Count %, check that they're well-formed. ++ n := 0 ++ hasPlus := false ++ for i := 0; i < len(s); { ++ switch s[i] { ++ case '%': ++ n++ ++ if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { ++ s = s[i:] ++ if len(s) > 3 { ++ s = s[:3] ++ } ++ return "", EscapeError(s) ++ } ++ // Per https://tools.ietf.org/html/rfc3986#page-21 ++ // in the host component %-encoding can only be used ++ // for non-ASCII bytes. ++ // But https://tools.ietf.org/html/rfc6874#section-2 ++ // introduces %25 being allowed to escape a percent sign ++ // in IPv6 scoped-address literals. Yay. ++ if mode == encodeHost && unhex(s[i+1]) < 8 && s[i:i+3] != "%25" { ++ return "", EscapeError(s[i : i+3]) ++ } ++ if mode == encodeZone { ++ // RFC 6874 says basically "anything goes" for zone identifiers ++ // and that even non-ASCII can be redundantly escaped, ++ // but it seems prudent to restrict %-escaped bytes here to those ++ // that are valid host name bytes in their unescaped form. ++ // That is, you can use escaping in the zone identifier but not ++ // to introduce bytes you couldn't just write directly. ++ // But Windows puts spaces here! Yay. ++ v := unhex(s[i+1])<<4 | unhex(s[i+2]) ++ if s[i:i+3] != "%25" && v != ' ' && shouldEscape(v, encodeHost) { ++ return "", EscapeError(s[i : i+3]) ++ } ++ } ++ i += 3 ++ case '+': ++ hasPlus = mode == encodeQueryComponent ++ i++ ++ default: ++ if (mode == encodeHost || mode == encodeZone) && s[i] < 0x80 && shouldEscape(s[i], mode) { ++ return "", InvalidHostError(s[i : i+1]) ++ } ++ i++ ++ } ++ } ++ ++ if n == 0 && !hasPlus { ++ return s, nil ++ } ++ ++ t := make([]byte, len(s)-2*n) ++ j := 0 ++ for i := 0; i < len(s); { ++ switch s[i] { ++ case '%': ++ t[j] = unhex(s[i+1])<<4 | unhex(s[i+2]) ++ j++ ++ i += 3 ++ case '+': ++ if mode == encodeQueryComponent { ++ t[j] = ' ' ++ } else { ++ t[j] = '+' ++ } ++ j++ ++ i++ ++ default: ++ t[j] = s[i] ++ j++ ++ i++ ++ } ++ } ++ return string(t), nil ++} ++ ++// PathEscape escapes the string so it can be safely placed ++// inside a URL path segment. ++func PathEscape(s string) string { ++ return escape(s, encodePathSegment) ++} ++ ++func escape(s string, mode encoding) string { ++ spaceCount, hexCount := 0, 0 ++ for i := 0; i < len(s); i++ { ++ c := s[i] ++ if shouldEscape(c, mode) { ++ if c == ' ' && mode == encodeQueryComponent { ++ spaceCount++ ++ } else { ++ hexCount++ ++ } ++ } ++ } ++ ++ if spaceCount == 0 && hexCount == 0 { ++ return s ++ } ++ ++ var buf [64]byte ++ var t []byte ++ ++ required := len(s) + 2*hexCount ++ if required <= len(buf) { ++ t = buf[:required] ++ } else { ++ t = make([]byte, required) ++ } ++ ++ if hexCount == 0 { ++ copy(t, s) ++ for i := 0; i < len(s); i++ { ++ if s[i] == ' ' { ++ t[i] = '+' ++ } ++ } ++ return string(t) ++ } ++ ++ j := 0 ++ for i := 0; i < len(s); i++ { ++ switch c := s[i]; { ++ case c == ' ' && mode == encodeQueryComponent: ++ t[j] = '+' ++ j++ ++ case shouldEscape(c, mode): ++ t[j] = '%' ++ t[j+1] = "0123456789ABCDEF"[c>>4] ++ t[j+2] = "0123456789ABCDEF"[c&15] ++ j += 3 ++ default: ++ t[j] = s[i] ++ j++ ++ } ++ } ++ return string(t) ++} |