From ff5b944400e6b15c987a9197f2e2771f96f73d4f Mon Sep 17 00:00:00 2001 From: Spring Raindrop Date: Mon, 15 Aug 2022 06:45:16 +0000 Subject: [PATCH] tor: fix parsing replies Replies may contain quoted values that include spaces, newlines and/or escaped characters (including doublequote itself). Not accounting for that leads to errors when e. g. `COOKIEFILE` path contains spaces. --- tor/controller.go | 58 ++++++++++++++++++++++++++++++--------- tor/controller_test.go | 61 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 12 deletions(-) diff --git a/tor/controller.go b/tor/controller.go index c69a568d0..a9ebcf9a3 100644 --- a/tor/controller.go +++ b/tor/controller.go @@ -10,6 +10,7 @@ import ( "fmt" "io/ioutil" "net/textproto" + "regexp" "strconv" "strings" "sync/atomic" @@ -77,6 +78,17 @@ var ( // errTCNotStarted is used when we require the tor controller to be // not stopped while it is. errTCStopped = errors.New("tor controller must not be stopped") + + // replyFieldRegexp is the regular expression used to find fields in a + // reply. Parameters within a reply should be of the form KEY=VALUE or + // KEY="VALUE", where quoted values might contain spaces, newlines and + // quoted pairs. If the parameter doesn't contain "=", then we can + // assume it doesn't provide any relevant information that isn't already + // known. Read more on this topic: + // https://gitweb.torproject.org/torspec.git/tree/control-spec.txt#n188 + replyFieldRegexp = regexp.MustCompile( + `[^" \r\n]+=(?:"(?:[^"\\]|\\[\0-\x7F])*"|[^" \r\n]*)`, + ) ) // Controller is an implementation of the Tor Control protocol. This is used in @@ -367,29 +379,51 @@ func (c *Controller) readResponse(expected int) (int, string, error) { return code, reply, nil } +// unescapeValue removes escape codes from the value in the Tor reply. A +// backslash followed by any character represents that character, so we remove +// any backslash not preceded by another backslash. +func unescapeValue(value string) string { + newString := "" + justRemovedBackslash := false + + for _, char := range value { + if char == '\\' && !justRemovedBackslash { + justRemovedBackslash = true + continue + } + + newString += string(char) + justRemovedBackslash = false + } + + return newString +} + // parseTorReply parses the reply from the Tor server after receiving a command // from a controller. This will parse the relevant reply parameters into a map // of keys and values. func parseTorReply(reply string) map[string]string { params := make(map[string]string) - // Replies can either span single or multiple lines, so we'll default - // to stripping whitespace and newlines in order to retrieve the - // individual contents of it. The -1 indicates that we want this to span - // across all instances of a newline. - contents := strings.Split(strings.Replace(reply, "\n", " ", -1), " ") + // Find all fields of a reply. The -1 indicates that we want this to + // find all instances of the regexp. + contents := replyFieldRegexp.FindAllString(reply, -1) for _, content := range contents { // Each parameter within the reply should be of the form - // "KEY=VALUE". If the parameter doesn't contain "=", then we - // can assume it does not provide any other relevant information - // already known. + // KEY=VALUE or KEY="VALUE". keyValue := strings.SplitN(content, "=", 2) - if len(keyValue) != 2 { - continue - } - key := keyValue[0] value := keyValue[1] + + // Quoted strings need extra processing. + if strings.HasPrefix(value, `"`) { + // Remove quotes around the value. + value = value[1 : len(value)-1] + + // Unescape the value. + value = unescapeValue(value) + } + params[key] = value } diff --git a/tor/controller_test.go b/tor/controller_test.go index 64c6ae8bf..cfd392f58 100644 --- a/tor/controller_test.go +++ b/tor/controller_test.go @@ -359,3 +359,64 @@ func TestReconnectSucceed(t *testing.T) { // Check that the connection has been updated. require.NotEqual(t, proxy.clientConn, c.conn) } + +// TestParseTorReply tests that Tor replies are parsed correctly. +func TestParseTorReply(t *testing.T) { + testCase := []struct { + reply string + expectedParams map[string]string + }{ + { + // Test a regular reply. + reply: `VERSION Tor="0.4.7.8"`, + expectedParams: map[string]string{ + "Tor": "0.4.7.8", + }, + }, + { + // Test a reply with multiple values, one of them + // containing spaces. + reply: `AUTH METHODS=COOKIE,SAFECOOKIE,HASHEDPASSWORD` + + ` COOKIEFILE="/path/with/spaces/Tor Browser/c` + + `ontrol_auth_cookie"`, + expectedParams: map[string]string{ + "METHODS": "COOKIE,SAFECOOKIE,HASHEDPASSWORD", + "COOKIEFILE": "/path/with/spaces/Tor Browser/" + + "control_auth_cookie", + }, + }, + { + // Test a multiline reply. + reply: "ServiceID=id\r\nOK", + expectedParams: map[string]string{"ServiceID": "id"}, + }, + { + // Test a reply with invalid parameters. + reply: "AUTH =invalid", + expectedParams: map[string]string{}, + }, + { + // Test escaping arbitrary characters. + reply: `PARAM="esca\ped \"doub\lequotes\""`, + expectedParams: map[string]string{ + `PARAM`: `escaped "doublequotes"`, + }, + }, + { + // Test escaping backslashes. Each single backslash + // should be removed, each double backslash replaced + // with a single one. Note that the single backslash + // before the space escapes the space character, so + // there's two spaces in a row. + reply: `PARAM="escaped \\ \ \\\\"`, + expectedParams: map[string]string{ + `PARAM`: `escaped \ \\`, + }, + }, + } + + for _, tc := range testCase { + params := parseTorReply(tc.reply) + require.Equal(t, tc.expectedParams, params) + } +}