From 8e84bd8cb91b657550683106225237912bbe5b0f Mon Sep 17 00:00:00 2001 From: Sven Strickroth Date: Wed, 3 Aug 2011 15:40:38 +0200 Subject: [PATCH] Update tortoiseplink to TortoiseSVN rev. 21694 Signed-off-by: Sven Strickroth --- src/TortoisePlink/LOGGING.C | 2 +- src/TortoisePlink/PUTTY.H | 46 +- src/TortoisePlink/RLOGIN.C | 93 ++- src/TortoisePlink/SETTINGS.C | 157 ++-- src/TortoisePlink/SSH.C | 189 +++-- src/TortoisePlink/SSH.H | 5 + src/TortoisePlink/SSHBN.C | 884 ++++++++++++++++++++- src/TortoisePlink/SSHRSA.C | 86 +- src/TortoisePlink/SSHZLIB.C | 3 + .../Windows/MSVC/Plink/TortoisePlink.vcproj | 12 +- src/TortoisePlink/Windows/TortoisePlink.rc | 8 +- src/TortoisePlink/Windows/WINNET.C | 2 +- src/TortoisePlink/Windows/WINPGNTC.C | 84 +- src/TortoisePlink/Windows/WINSTUFF.H | 10 +- src/TortoisePlink/Windows/wingss.c | 14 +- 15 files changed, 1388 insertions(+), 207 deletions(-) diff --git a/src/TortoisePlink/LOGGING.C b/src/TortoisePlink/LOGGING.C index 4c7aa918c..8fc5819d2 100644 --- a/src/TortoisePlink/LOGGING.C +++ b/src/TortoisePlink/LOGGING.C @@ -43,7 +43,7 @@ static void logwrite(struct LogContext *ctx, void *data, int len) bufchain_add(&ctx->queue, data, len); } else if (ctx->state == L_OPEN) { assert(ctx->lgfp); - if (fwrite(data, 1, len, ctx->lgfp) < len) { + if (fwrite(data, 1, len, ctx->lgfp) < (size_t)len) { logfclose(ctx); ctx->state = L_ERROR; /* Log state is L_ERROR so this won't cause a loop */ diff --git a/src/TortoisePlink/PUTTY.H b/src/TortoisePlink/PUTTY.H index c72d8eb76..e2baee836 100644 --- a/src/TortoisePlink/PUTTY.H +++ b/src/TortoisePlink/PUTTY.H @@ -353,12 +353,52 @@ enum { * Defined here so that backends can export their GSS library tables * to the cross-platform settings code. */ -struct keyval { char *s; int v; }; +struct keyvalwhere { + /* + * Two fields which define a string and enum value to be + * equivalent to each other. + */ + char *s; + int v; + + /* + * The next pair of fields are used by gprefs() in settings.c to + * arrange that when it reads a list of strings representing a + * preference list and translates it into the corresponding list + * of integers, strings not appearing in the list are entered in a + * configurable position rather than uniformly at the end. + */ + + /* + * 'vrel' indicates which other value in the list to place this + * element relative to. It should be a value that has occurred in + * a 'v' field of some other element of the array, or -1 to + * indicate that we simply place relative to one or other end of + * the list. + * + * gprefs will try to process the elements in an order which makes + * this field work (i.e. so that the element referenced has been + * added before processing this one). + */ + int vrel; + + /* + * 'where' indicates whether to place the new value before or + * after the one referred to by vrel. -1 means before; +1 means + * after. + * + * When vrel is -1, this also implicitly indicates which end of + * the array to use. So vrel=-1, where=-1 means to place _before_ + * some end of the list (hence, at the last element); vrel=-1, + * where=+1 means to place _after_ an end (hence, at the first). + */ + int where; +}; #ifndef NO_GSSAPI extern const int ngsslibs; -extern const char *const gsslibnames[];/* for displaying in configuration */ -extern const struct keyval gsslibkeywords[]; /* for storing by settings.c */ +extern const char *const gsslibnames[]; /* for displaying in configuration */ +extern const struct keyvalwhere gsslibkeywords[]; /* for settings.c */ #endif extern const char *const ttymodes[]; diff --git a/src/TortoisePlink/RLOGIN.C b/src/TortoisePlink/RLOGIN.C index b514d7a5b..0abf8cdeb 100644 --- a/src/TortoisePlink/RLOGIN.C +++ b/src/TortoisePlink/RLOGIN.C @@ -27,6 +27,11 @@ typedef struct rlogin_tag { int cansize; int term_width, term_height; void *frontend; + + Config cfg; + + /* In case we need to read a username from the terminal before starting */ + prompts_t *prompt; } *Rlogin; static void rlogin_size(void *handle, int width, int height); @@ -113,6 +118,27 @@ static void rlogin_sent(Plug plug, int bufsize) rlogin->bufsize = bufsize; } +static void rlogin_startup(Rlogin rlogin, const char *ruser) +{ + char z = 0; + char *p; + sk_write(rlogin->s, &z, 1); + sk_write(rlogin->s, rlogin->cfg.localusername, + strlen(rlogin->cfg.localusername)); + sk_write(rlogin->s, &z, 1); + sk_write(rlogin->s, ruser, + strlen(ruser)); + sk_write(rlogin->s, &z, 1); + sk_write(rlogin->s, rlogin->cfg.termtype, + strlen(rlogin->cfg.termtype)); + sk_write(rlogin->s, "/", 1); + for (p = rlogin->cfg.termspeed; isdigit((unsigned char)*p); p++) continue; + sk_write(rlogin->s, rlogin->cfg.termspeed, p - rlogin->cfg.termspeed); + rlogin->bufsize = sk_write(rlogin->s, &z, 1); + + rlogin->prompt = NULL; +} + /* * Called to set up the rlogin connection. * @@ -135,6 +161,7 @@ static const char *rlogin_init(void *frontend_handle, void **backend_handle, SockAddr addr; const char *err; Rlogin rlogin; + char ruser[sizeof(cfg->username)]; rlogin = snew(struct rlogin_tag); rlogin->fn = &fn_table; @@ -144,6 +171,8 @@ static const char *rlogin_init(void *frontend_handle, void **backend_handle, rlogin->term_height = cfg->height; rlogin->firstbyte = 1; rlogin->cansize = 0; + rlogin->prompt = NULL; + rlogin->cfg = *cfg; /* STRUCTURE COPY */ *backend_handle = rlogin; /* @@ -175,30 +204,6 @@ static const char *rlogin_init(void *frontend_handle, void **backend_handle, if ((err = sk_socket_error(rlogin->s)) != NULL) return err; - /* - * Send local username, remote username, terminal/speed - */ - - { - char z = 0; - char *p; - char ruser[sizeof(cfg->username)]; - (void) get_remote_username(cfg, ruser, sizeof(ruser)); - sk_write(rlogin->s, &z, 1); - sk_write(rlogin->s, cfg->localusername, - strlen(cfg->localusername)); - sk_write(rlogin->s, &z, 1); - sk_write(rlogin->s, ruser, - strlen(ruser)); - sk_write(rlogin->s, &z, 1); - sk_write(rlogin->s, cfg->termtype, - strlen(cfg->termtype)); - sk_write(rlogin->s, "/", 1); - for (p = cfg->termspeed; isdigit((unsigned char)*p); p++) continue; - sk_write(rlogin->s, cfg->termspeed, p - cfg->termspeed); - rlogin->bufsize = sk_write(rlogin->s, &z, 1); - } - if (*cfg->loghost) { char *colon; @@ -215,6 +220,28 @@ static const char *rlogin_init(void *frontend_handle, void **backend_handle, } } + /* + * Send local username, remote username, terminal type and + * terminal speed - unless we don't have the remote username yet, + * in which case we prompt for it and may end up deferring doing + * anything else until the local prompt mechanism returns. + */ + if (get_remote_username(cfg, ruser, sizeof(ruser))) { + rlogin_startup(rlogin, ruser); + } else { + int ret; + + rlogin->prompt = new_prompts(rlogin->frontend); + rlogin->prompt->to_server = TRUE; + rlogin->prompt->name = dupstr("Rlogin login name"); + add_prompt(rlogin->prompt, dupstr("rlogin username: "), TRUE, + sizeof(cfg->username)); + ret = get_userpass_input(rlogin->prompt, NULL, 0); + if (ret >= 0) { + rlogin_startup(rlogin, rlogin->prompt->prompts[0]->result); + } + } + return NULL; } @@ -222,6 +249,8 @@ static void rlogin_free(void *handle) { Rlogin rlogin = (Rlogin) handle; + if (rlogin->prompt) + free_prompts(rlogin->prompt); if (rlogin->s) sk_close(rlogin->s); sfree(rlogin); @@ -244,7 +273,21 @@ static int rlogin_send(void *handle, char *buf, int len) if (rlogin->s == NULL) return 0; - rlogin->bufsize = sk_write(rlogin->s, buf, len); + if (rlogin->prompt) { + /* + * We're still prompting for a username, and aren't talking + * directly to the network connection yet. + */ + int ret = get_userpass_input(rlogin->prompt, + (unsigned char *)buf, len); + if (ret >= 0) { + rlogin_startup(rlogin, rlogin->prompt->prompts[0]->result); + /* that nulls out rlogin->prompt, so then we'll start sending + * data down the wire in the obvious way */ + } + } else { + rlogin->bufsize = sk_write(rlogin->s, buf, len); + } return rlogin->bufsize; } diff --git a/src/TortoisePlink/SETTINGS.C b/src/TortoisePlink/SETTINGS.C index 46e19f49c..b2f3ddcf9 100644 --- a/src/TortoisePlink/SETTINGS.C +++ b/src/TortoisePlink/SETTINGS.C @@ -9,21 +9,21 @@ #include "storage.h" /* The cipher order given here is the default order. */ -static const struct keyval ciphernames[] = { - { "aes", CIPHER_AES }, - { "blowfish", CIPHER_BLOWFISH }, - { "3des", CIPHER_3DES }, - { "WARN", CIPHER_WARN }, - { "arcfour", CIPHER_ARCFOUR }, - { "des", CIPHER_DES } +static const struct keyvalwhere ciphernames[] = { + { "aes", CIPHER_AES, -1, -1 }, + { "blowfish", CIPHER_BLOWFISH, -1, -1 }, + { "3des", CIPHER_3DES, -1, -1 }, + { "WARN", CIPHER_WARN, -1, -1 }, + { "arcfour", CIPHER_ARCFOUR, -1, -1 }, + { "des", CIPHER_DES, -1, -1 } }; -static const struct keyval kexnames[] = { - { "dh-gex-sha1", KEX_DHGEX }, - { "dh-group14-sha1", KEX_DHGROUP14 }, - { "dh-group1-sha1", KEX_DHGROUP1 }, - { "rsa", KEX_RSA }, - { "WARN", KEX_WARN } +static const struct keyvalwhere kexnames[] = { + { "dh-gex-sha1", KEX_DHGEX, -1, -1 }, + { "dh-group14-sha1", KEX_DHGROUP14, -1, -1 }, + { "dh-group1-sha1", KEX_DHGROUP1, -1, -1 }, + { "rsa", KEX_RSA, KEX_WARN, -1 }, + { "WARN", KEX_WARN, -1, -1 } }; /* @@ -188,7 +188,8 @@ static void wmap(void *handle, char const *key, char const *value, int len) sfree(buf); } -static int key2val(const struct keyval *mapping, int nmaps, char *key) +static int key2val(const struct keyvalwhere *mapping, + int nmaps, char *key) { int i; for (i = 0; i < nmaps; i++) @@ -196,7 +197,8 @@ static int key2val(const struct keyval *mapping, int nmaps, char *key) return -1; } -static const char *val2key(const struct keyval *mapping, int nmaps, int val) +static const char *val2key(const struct keyvalwhere *mapping, + int nmaps, int val) { int i; for (i = 0; i < nmaps; i++) @@ -211,40 +213,80 @@ static const char *val2key(const struct keyval *mapping, int nmaps, int val) * XXX: assumes vals in 'mapping' are small +ve integers */ static void gprefs(void *sesskey, char *name, char *def, - const struct keyval *mapping, int nvals, + const struct keyvalwhere *mapping, int nvals, int *array) { - char commalist[80]; - char *tokarg = commalist; - int n; + char commalist[256]; + char *p, *q; + int i, j, n, v, pos; unsigned long seen = 0; /* bitmap for weeding dups etc */ + + /* + * Fetch the string which we'll parse as a comma-separated list. + */ gpps(sesskey, name, def, commalist, sizeof(commalist)); - /* Grotty parsing of commalist. */ + /* + * Go through that list and convert it into values. + */ n = 0; - do { - int v; - char *key; - key = strtok(tokarg, ","); /* sorry */ - tokarg = NULL; - if (!key) break; - if (((v = key2val(mapping, nvals, key)) != -1) && - !(seen & 1<= pos; j--) + array[j+1] = array[j]; + array[pos] = mapping[i].v; + n++; + } + } } } @@ -252,25 +294,34 @@ static void gprefs(void *sesskey, char *name, char *def, * Write out a preference list. */ static void wprefs(void *sesskey, char *name, - const struct keyval *mapping, int nvals, + const struct keyvalwhere *mapping, int nvals, int *array) { - char buf[80] = ""; /* XXX assumed big enough */ - int l = sizeof(buf)-1, i; - buf[l] = '\0'; - for (i = 0; l > 0 && i < nvals; i++) { + char *buf, *p; + int i, maxlen; + + for (maxlen = i = 0; i < nvals; i++) { const char *s = val2key(mapping, nvals, array[i]); if (s) { - int sl = strlen(s); - if (i > 0) { - strncat(buf, ",", l); - l--; - } - strncat(buf, s, l); - l -= sl; + maxlen += 1 + strlen(s); + } + } + + buf = snewn(maxlen, char); + p = buf; + + for (i = 0; i < nvals; i++) { + const char *s = val2key(mapping, nvals, array[i]); + if (s) { + p += sprintf(p, "%s%s", (p > buf ? "," : ""), s); } } + + assert(p - buf == maxlen - 1); /* maxlen counted the NUL */ + write_setting_s(sesskey, name, buf); + + sfree(buf); } char *save_settings(char *section, Config * cfg) diff --git a/src/TortoisePlink/SSH.C b/src/TortoisePlink/SSH.C index 0982f84a4..950af144b 100644 --- a/src/TortoisePlink/SSH.C +++ b/src/TortoisePlink/SSH.C @@ -544,7 +544,7 @@ static int ssh_comp_none_disable(void *handle) return 0; } const static struct ssh_compress ssh_comp_none = { - "none", + "none", NULL, ssh_comp_none_init, ssh_comp_none_cleanup, ssh_comp_none_block, ssh_comp_none_init, ssh_comp_none_cleanup, ssh_comp_none_block, ssh_comp_none_disable, NULL @@ -3744,7 +3744,9 @@ static int do_ssh1_login(Ssh ssh, unsigned char *in, int inlen, sfree(s->response); if (s->publickey_blob && !s->tried_publickey) logevent("Configured key file not in Pageant"); - } + } else { + logevent("Failed to get reply from Pageant"); + } if (s->authed) break; } @@ -5422,6 +5424,8 @@ static int do_ssh2_transport(Ssh ssh, void *vin, int inlen, int n_preferred_ciphers; const struct ssh2_ciphers *preferred_ciphers[CIPHER_MAX]; const struct ssh_compress *preferred_comp; + int userauth_succeeded; /* for delayed compression */ + int pending_compression; int got_session_id, activated_authconn; struct Packet *pktout; int dlgret; @@ -5437,6 +5441,8 @@ static int do_ssh2_transport(Ssh ssh, void *vin, int inlen, s->cscomp_tobe = s->sccomp_tobe = NULL; s->got_session_id = s->activated_authconn = FALSE; + s->userauth_succeeded = FALSE; + s->pending_compression = FALSE; /* * Be prepared to work around the buggy MAC problem. @@ -5601,26 +5607,32 @@ static int do_ssh2_transport(Ssh ssh, void *vin, int inlen, if (i < s->nmacs - 1) ssh2_pkt_addstring_str(s->pktout, ","); } - /* List client->server compression algorithms. */ - ssh2_pkt_addstring_start(s->pktout); - assert(lenof(compressions) > 1); - ssh2_pkt_addstring_str(s->pktout, s->preferred_comp->name); - for (i = 0; i < lenof(compressions); i++) { - const struct ssh_compress *c = compressions[i]; - if (c != s->preferred_comp) { + /* List client->server compression algorithms, + * then server->client compression algorithms. (We use the + * same set twice.) */ + for (j = 0; j < 2; j++) { + ssh2_pkt_addstring_start(s->pktout); + assert(lenof(compressions) > 1); + /* Prefer non-delayed versions */ + ssh2_pkt_addstring_str(s->pktout, s->preferred_comp->name); + /* We don't even list delayed versions of algorithms until + * they're allowed to be used, to avoid a race. See the end of + * this function. */ + if (s->userauth_succeeded && s->preferred_comp->delayed_name) { ssh2_pkt_addstring_str(s->pktout, ","); - ssh2_pkt_addstring_str(s->pktout, c->name); + ssh2_pkt_addstring_str(s->pktout, + s->preferred_comp->delayed_name); } - } - /* List server->client compression algorithms. */ - ssh2_pkt_addstring_start(s->pktout); - assert(lenof(compressions) > 1); - ssh2_pkt_addstring_str(s->pktout, s->preferred_comp->name); - for (i = 0; i < lenof(compressions); i++) { - const struct ssh_compress *c = compressions[i]; - if (c != s->preferred_comp) { - ssh2_pkt_addstring_str(s->pktout, ","); - ssh2_pkt_addstring_str(s->pktout, c->name); + for (i = 0; i < lenof(compressions); i++) { + const struct ssh_compress *c = compressions[i]; + if (c != s->preferred_comp) { + ssh2_pkt_addstring_str(s->pktout, ","); + ssh2_pkt_addstring_str(s->pktout, c->name); + if (s->userauth_succeeded && c->delayed_name) { + ssh2_pkt_addstring_str(s->pktout, ","); + ssh2_pkt_addstring_str(s->pktout, c->delayed_name); + } + } } } /* List client->server languages. Empty list. */ @@ -5769,6 +5781,13 @@ static int do_ssh2_transport(Ssh ssh, void *vin, int inlen, if (in_commasep_string(c->name, str, len)) { s->cscomp_tobe = c; break; + } else if (in_commasep_string(c->delayed_name, str, len)) { + if (s->userauth_succeeded) { + s->cscomp_tobe = c; + break; + } else { + s->pending_compression = TRUE; /* try this later */ + } } } ssh_pkt_getstring(pktin, &str, &len); /* server->client compression */ @@ -5778,8 +5797,19 @@ static int do_ssh2_transport(Ssh ssh, void *vin, int inlen, if (in_commasep_string(c->name, str, len)) { s->sccomp_tobe = c; break; + } else if (in_commasep_string(c->delayed_name, str, len)) { + if (s->userauth_succeeded) { + s->sccomp_tobe = c; + break; + } else { + s->pending_compression = TRUE; /* try this later */ + } } } + if (s->pending_compression) { + logevent("Server supports delayed compression; " + "will try this later"); + } ssh_pkt_getstring(pktin, &str, &len); /* client->server language */ ssh_pkt_getstring(pktin, &str, &len); /* server->client language */ s->ignorepkt = ssh2_pkt_getbool(pktin) && !s->guessok; @@ -6315,19 +6345,52 @@ static int do_ssh2_transport(Ssh ssh, void *vin, int inlen, * start. * * We _also_ go back to the start if we see pktin==NULL and - * inlen==-1, because this is a special signal meaning + * inlen negative, because this is a special signal meaning * `initiate client-driven rekey', and `in' contains a message * giving the reason for the rekey. + * + * inlen==-1 means always initiate a rekey; + * inlen==-2 means that userauth has completed successfully and + * we should consider rekeying (for delayed compression). */ while (!((pktin && pktin->type == SSH2_MSG_KEXINIT) || - (!pktin && inlen == -1))) { + (!pktin && inlen < 0))) { wait_for_rekey: crReturn(1); } if (pktin) { logevent("Server initiated key re-exchange"); } else { + if (inlen == -2) { + /* + * authconn has seen a USERAUTH_SUCCEEDED. Time to enable + * delayed compression, if it's available. + * + * draft-miller-secsh-compression-delayed-00 says that you + * negotiate delayed compression in the first key exchange, and + * both sides start compressing when the server has sent + * USERAUTH_SUCCESS. This has a race condition -- the server + * can't know when the client has seen it, and thus which incoming + * packets it should treat as compressed. + * + * Instead, we do the initial key exchange without offering the + * delayed methods, but note if the server offers them; when we + * get here, if a delayed method was available that was higher + * on our list than what we got, we initiate a rekey in which we + * _do_ list the delayed methods (and hopefully get it as a + * result). Subsequent rekeys will do the same. + */ + assert(!s->userauth_succeeded); /* should only happen once */ + s->userauth_succeeded = TRUE; + if (!s->pending_compression) + /* Can't see any point rekeying. */ + goto wait_for_rekey; /* this is utterly horrid */ + /* else fall through to rekey... */ + s->pending_compression = FALSE; + } /* + * Now we've decided to rekey. + * * Special case: if the server bug is set that doesn't * allow rekeying, we give a different log message and * continue waiting. (If such a server _initiates_ a rekey, @@ -6345,7 +6408,7 @@ static int do_ssh2_transport(Ssh ssh, void *vin, int inlen, schedule_timer(ssh->cfg.ssh_rekey_time*60*TICKSPERSEC, ssh2_timer, ssh); } - goto wait_for_rekey; /* this is utterly horrid */ + goto wait_for_rekey; /* this is still utterly horrid */ } else { logeventf(ssh, "Initiating key re-exchange (%s)", (char *)in); } @@ -6564,25 +6627,44 @@ static struct ssh_channel *ssh2_channel_msg(Ssh ssh, struct Packet *pktin) return c; } +static int ssh2_handle_winadj_response(struct ssh_channel *c) +{ + struct winadj *wa = c->v.v2.winadj_head; + if (!wa) + return FALSE; + c->v.v2.winadj_head = wa->next; + c->v.v2.remlocwin += wa->size; + sfree(wa); + /* + * winadj messages are only sent when the window is fully open, so + * if we get an ack of one, we know any pending unthrottle is + * complete. + */ + if (c->v.v2.throttle_state == UNTHROTTLING) + c->v.v2.throttle_state = UNTHROTTLED; + return TRUE; +} + static void ssh2_msg_channel_success(Ssh ssh, struct Packet *pktin) { /* * This should never get called. All channel requests are either - * sent with want_reply false or are sent before this handler gets - * installed. + * sent with want_reply false, are sent before this handler gets + * installed, or are "winadj@putty" requests, which servers should + * never respond to with success. + * + * However, at least one server ("boks_sshd") is known to return + * SUCCESS for channel requests it's never heard of, such as + * "winadj@putty". Raised with foxt.com as bug 090916-090424, but + * for the sake of a quiet life, we handle it just the same as the + * expected FAILURE. */ struct ssh_channel *c; - struct winadj *wa; c = ssh2_channel_msg(ssh, pktin); if (!c) return; - wa = c->v.v2.winadj_head; - if (wa) - ssh_disconnect(ssh, NULL, "Received SSH_MSG_CHANNEL_SUCCESS for " - "\"winadj@putty.projects.tartarus.org\"", - SSH2_DISCONNECT_PROTOCOL_ERROR, FALSE); - else + if (!ssh2_handle_winadj_response(c)) ssh_disconnect(ssh, NULL, "Received unsolicited SSH_MSG_CHANNEL_SUCCESS", SSH2_DISCONNECT_PROTOCOL_ERROR, FALSE); @@ -6597,28 +6679,14 @@ static void ssh2_msg_channel_failure(Ssh ssh, struct Packet *pktin) * installed. */ struct ssh_channel *c; - struct winadj *wa; c = ssh2_channel_msg(ssh, pktin); if (!c) return; - wa = c->v.v2.winadj_head; - if (!wa) { + if (!ssh2_handle_winadj_response(c)) ssh_disconnect(ssh, NULL, "Received unsolicited SSH_MSG_CHANNEL_FAILURE", SSH2_DISCONNECT_PROTOCOL_ERROR, FALSE); - return; - } - c->v.v2.winadj_head = wa->next; - c->v.v2.remlocwin += wa->size; - sfree(wa); - /* - * winadj messages are only sent when the window is fully open, so - * if we get an ack of one, we know any pending unthrottle is - * complete. - */ - if (c->v.v2.throttle_state == UNTHROTTLING) - c->v.v2.throttle_state = UNTHROTTLED; } static void ssh2_msg_channel_window_adjust(Ssh ssh, struct Packet *pktin) @@ -7256,7 +7324,7 @@ static void do_ssh2_authconn(Ssh ssh, unsigned char *in, int inlen, int tried_gssapi; #endif int kbd_inter_refused; - int we_are_in; + int we_are_in, userauth_success; prompts_t *cur_prompt; int num_prompts; char username[100]; @@ -7292,7 +7360,7 @@ static void do_ssh2_authconn(Ssh ssh, unsigned char *in, int inlen, crBegin(ssh->do_ssh2_authconn_crstate); s->done_service_req = FALSE; - s->we_are_in = FALSE; + s->we_are_in = s->userauth_success = FALSE; #ifndef NO_GSSAPI s->tried_gssapi = FALSE; #endif @@ -7439,6 +7507,8 @@ static void do_ssh2_authconn(Ssh ssh, unsigned char *in, int inlen, s->nkeys = 0; } } + } else { + logevent("Failed to get reply from Pageant"); } } @@ -7582,7 +7652,7 @@ static void do_ssh2_authconn(Ssh ssh, unsigned char *in, int inlen, } if (pktin->type == SSH2_MSG_USERAUTH_SUCCESS) { logevent("Access granted"); - s->we_are_in = TRUE; + s->we_are_in = s->userauth_success = TRUE; break; } @@ -8590,6 +8660,20 @@ static void do_ssh2_authconn(Ssh ssh, unsigned char *in, int inlen, if (s->agent_response) sfree(s->agent_response); + if (s->userauth_success) { + /* + * We've just received USERAUTH_SUCCESS, and we haven't sent any + * packets since. Signal the transport layer to consider enacting + * delayed compression. + * + * (Relying on we_are_in is not sufficient, as + * draft-miller-secsh-compression-delayed is quite clear that it + * triggers on USERAUTH_SUCCESS specifically, and we_are_in can + * become set for other reasons.) + */ + do_ssh2_transport(ssh, "enabling delayed compression", -2, NULL); + } + /* * Now the connection protocol has started, one way or another. */ @@ -9056,10 +9140,9 @@ static void ssh2_msg_debug(Ssh ssh, struct Packet *pktin) /* log the debug message */ char *msg; int msglen; - int always_display; - /* XXX maybe we should actually take notice of this */ - always_display = ssh2_pkt_getbool(pktin); + /* XXX maybe we should actually take notice of the return value */ + ssh2_pkt_getbool(pktin); ssh_pkt_getstring(pktin, &msg, &msglen); logeventf(ssh, "Remote debug message: %.*s", msglen, msg); diff --git a/src/TortoisePlink/SSH.H b/src/TortoisePlink/SSH.H index 86c402965..605b608a4 100644 --- a/src/TortoisePlink/SSH.H +++ b/src/TortoisePlink/SSH.H @@ -251,6 +251,9 @@ struct ssh_signkey { struct ssh_compress { char *name; + /* For zlib@openssh.com: if non-NULL, this name will be considered once + * userauth has completed successfully. */ + char *delayed_name; void *(*compress_init) (void); void (*compress_cleanup) (void *); int (*compress) (void *, unsigned char *block, int len, @@ -447,6 +450,8 @@ int ssh1_write_bignum(void *data, Bignum bn); Bignum biggcd(Bignum a, Bignum b); unsigned short bignum_mod_short(Bignum number, unsigned short modulus); Bignum bignum_add_long(Bignum number, unsigned long addend); +Bignum bigadd(Bignum a, Bignum b); +Bignum bigsub(Bignum a, Bignum b); Bignum bigmul(Bignum a, Bignum b); Bignum bigmuladd(Bignum a, Bignum b, Bignum addend); Bignum bigdiv(Bignum a, Bignum b); diff --git a/src/TortoisePlink/SSHBN.C b/src/TortoisePlink/SSHBN.C index e9ff0cde4..51cecdf2b 100644 --- a/src/TortoisePlink/SSHBN.C +++ b/src/TortoisePlink/SSHBN.C @@ -51,7 +51,34 @@ typedef unsigned __int64 BignumDblInt; __asm mov r, edx \ __asm mov q, eax \ } while(0) +#elif defined _LP64 +/* 64-bit architectures can do 32x32->64 chunks at a time */ +typedef unsigned int BignumInt; +typedef unsigned long BignumDblInt; +#define BIGNUM_INT_MASK 0xFFFFFFFFU +#define BIGNUM_TOP_BIT 0x80000000U +#define BIGNUM_INT_BITS 32 +#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2) +#define DIVMOD_WORD(q, r, hi, lo, w) do { \ + BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) | lo; \ + q = n / w; \ + r = n % w; \ +} while (0) +#elif defined _LLP64 +/* 64-bit architectures in which unsigned long is 32 bits, not 64 */ +typedef unsigned long BignumInt; +typedef unsigned long long BignumDblInt; +#define BIGNUM_INT_MASK 0xFFFFFFFFUL +#define BIGNUM_TOP_BIT 0x80000000UL +#define BIGNUM_INT_BITS 32 +#define MUL_WORD(w1, w2) ((BignumDblInt)w1 * w2) +#define DIVMOD_WORD(q, r, hi, lo, w) do { \ + BignumDblInt n = (((BignumDblInt)hi) << BIGNUM_INT_BITS) | lo; \ + q = n / w; \ + r = n % w; \ +} while (0) #else +/* Fallback for all other cases */ typedef unsigned short BignumInt; typedef unsigned long BignumDblInt; #define BIGNUM_INT_MASK 0xFFFFU @@ -133,29 +160,432 @@ Bignum bn_power_2(int n) } /* + * Internal addition. Sets c = a - b, where 'a', 'b' and 'c' are all + * big-endian arrays of 'len' BignumInts. Returns a BignumInt carried + * off the top. + */ +static BignumInt internal_add(const BignumInt *a, const BignumInt *b, + BignumInt *c, int len) +{ + int i; + BignumDblInt carry = 0; + + for (i = len-1; i >= 0; i--) { + carry += (BignumDblInt)a[i] + b[i]; + c[i] = (BignumInt)carry; + carry >>= BIGNUM_INT_BITS; + } + + return (BignumInt)carry; +} + +/* + * Internal subtraction. Sets c = a - b, where 'a', 'b' and 'c' are + * all big-endian arrays of 'len' BignumInts. Any borrow from the top + * is ignored. + */ +static void internal_sub(const BignumInt *a, const BignumInt *b, + BignumInt *c, int len) +{ + int i; + BignumDblInt carry = 1; + + for (i = len-1; i >= 0; i--) { + carry += (BignumDblInt)a[i] + (b[i] ^ BIGNUM_INT_MASK); + c[i] = (BignumInt)carry; + carry >>= BIGNUM_INT_BITS; + } +} + +/* * Compute c = a * b. * Input is in the first len words of a and b. * Result is returned in the first 2*len words of c. + * + * 'scratch' must point to an array of BignumInt of size at least + * mul_compute_scratch(len). (This covers the needs of internal_mul + * and all its recursive calls to itself.) */ -static void internal_mul(BignumInt *a, BignumInt *b, - BignumInt *c, int len) +#define KARATSUBA_THRESHOLD 50 +static int mul_compute_scratch(int len) { - int i, j; - BignumDblInt t; - - for (j = 0; j < 2 * len; j++) - c[j] = 0; - - for (i = len - 1; i >= 0; i--) { - t = 0; - for (j = len - 1; j >= 0; j--) { - t += MUL_WORD(a[i], (BignumDblInt) b[j]); - t += (BignumDblInt) c[i + j + 1]; - c[i + j + 1] = (BignumInt) t; - t = t >> BIGNUM_INT_BITS; - } - c[i] = (BignumInt) t; + int ret = 0; + while (len > KARATSUBA_THRESHOLD) { + int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */ + int midlen = botlen + 1; + ret += 4*midlen; + len = midlen; } + return ret; +} +static void internal_mul(const BignumInt *a, const BignumInt *b, + BignumInt *c, int len, BignumInt *scratch) +{ + if (len > KARATSUBA_THRESHOLD) { + int i; + + /* + * Karatsuba divide-and-conquer algorithm. Cut each input in + * half, so that it's expressed as two big 'digits' in a giant + * base D: + * + * a = a_1 D + a_0 + * b = b_1 D + b_0 + * + * Then the product is of course + * + * ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0 + * + * and we compute the three coefficients by recursively + * calling ourself to do half-length multiplications. + * + * The clever bit that makes this worth doing is that we only + * need _one_ half-length multiplication for the central + * coefficient rather than the two that it obviouly looks + * like, because we can use a single multiplication to compute + * + * (a_1 + a_0) (b_1 + b_0) = a_1 b_1 + a_1 b_0 + a_0 b_1 + a_0 b_0 + * + * and then we subtract the other two coefficients (a_1 b_1 + * and a_0 b_0) which we were computing anyway. + * + * Hence we get to multiply two numbers of length N in about + * three times as much work as it takes to multiply numbers of + * length N/2, which is obviously better than the four times + * as much work it would take if we just did a long + * conventional multiply. + */ + + int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */ + int midlen = botlen + 1; + BignumDblInt carry; +#ifdef KARA_DEBUG + int i; +#endif + + /* + * The coefficients a_1 b_1 and a_0 b_0 just avoid overlapping + * in the output array, so we can compute them immediately in + * place. + */ + +#ifdef KARA_DEBUG + printf("a1,a0 = 0x"); + for (i = 0; i < len; i++) { + if (i == toplen) printf(", 0x"); + printf("%0*x", BIGNUM_INT_BITS/4, a[i]); + } + printf("\n"); + printf("b1,b0 = 0x"); + for (i = 0; i < len; i++) { + if (i == toplen) printf(", 0x"); + printf("%0*x", BIGNUM_INT_BITS/4, b[i]); + } + printf("\n"); +#endif + + /* a_1 b_1 */ + internal_mul(a, b, c, toplen, scratch); +#ifdef KARA_DEBUG + printf("a1b1 = 0x"); + for (i = 0; i < 2*toplen; i++) { + printf("%0*x", BIGNUM_INT_BITS/4, c[i]); + } + printf("\n"); +#endif + + /* a_0 b_0 */ + internal_mul(a + toplen, b + toplen, c + 2*toplen, botlen, scratch); +#ifdef KARA_DEBUG + printf("a0b0 = 0x"); + for (i = 0; i < 2*botlen; i++) { + printf("%0*x", BIGNUM_INT_BITS/4, c[2*toplen+i]); + } + printf("\n"); +#endif + + /* Zero padding. midlen exceeds toplen by at most 2, so just + * zero the first two words of each input and the rest will be + * copied over. */ + scratch[0] = scratch[1] = scratch[midlen] = scratch[midlen+1] = 0; + + for (i = 0; i < toplen; i++) { + scratch[midlen - toplen + i] = a[i]; /* a_1 */ + scratch[2*midlen - toplen + i] = b[i]; /* b_1 */ + } + + /* compute a_1 + a_0 */ + scratch[0] = internal_add(scratch+1, a+toplen, scratch+1, botlen); +#ifdef KARA_DEBUG + printf("a1plusa0 = 0x"); + for (i = 0; i < midlen; i++) { + printf("%0*x", BIGNUM_INT_BITS/4, scratch[i]); + } + printf("\n"); +#endif + /* compute b_1 + b_0 */ + scratch[midlen] = internal_add(scratch+midlen+1, b+toplen, + scratch+midlen+1, botlen); +#ifdef KARA_DEBUG + printf("b1plusb0 = 0x"); + for (i = 0; i < midlen; i++) { + printf("%0*x", BIGNUM_INT_BITS/4, scratch[midlen+i]); + } + printf("\n"); +#endif + + /* + * Now we can do the third multiplication. + */ + internal_mul(scratch, scratch + midlen, scratch + 2*midlen, midlen, + scratch + 4*midlen); +#ifdef KARA_DEBUG + printf("a1plusa0timesb1plusb0 = 0x"); + for (i = 0; i < 2*midlen; i++) { + printf("%0*x", BIGNUM_INT_BITS/4, scratch[2*midlen+i]); + } + printf("\n"); +#endif + + /* + * Now we can reuse the first half of 'scratch' to compute the + * sum of the outer two coefficients, to subtract from that + * product to obtain the middle one. + */ + scratch[0] = scratch[1] = scratch[2] = scratch[3] = 0; + for (i = 0; i < 2*toplen; i++) + scratch[2*midlen - 2*toplen + i] = c[i]; + scratch[1] = internal_add(scratch+2, c + 2*toplen, + scratch+2, 2*botlen); +#ifdef KARA_DEBUG + printf("a1b1plusa0b0 = 0x"); + for (i = 0; i < 2*midlen; i++) { + printf("%0*x", BIGNUM_INT_BITS/4, scratch[i]); + } + printf("\n"); +#endif + + internal_sub(scratch + 2*midlen, scratch, + scratch + 2*midlen, 2*midlen); +#ifdef KARA_DEBUG + printf("a1b0plusa0b1 = 0x"); + for (i = 0; i < 2*midlen; i++) { + printf("%0*x", BIGNUM_INT_BITS/4, scratch[2*midlen+i]); + } + printf("\n"); +#endif + + /* + * And now all we need to do is to add that middle coefficient + * back into the output. We may have to propagate a carry + * further up the output, but we can be sure it won't + * propagate right the way off the top. + */ + carry = internal_add(c + 2*len - botlen - 2*midlen, + scratch + 2*midlen, + c + 2*len - botlen - 2*midlen, 2*midlen); + i = 2*len - botlen - 2*midlen - 1; + while (carry) { + assert(i >= 0); + carry += c[i]; + c[i] = (BignumInt)carry; + carry >>= BIGNUM_INT_BITS; + i--; + } +#ifdef KARA_DEBUG + printf("ab = 0x"); + for (i = 0; i < 2*len; i++) { + printf("%0*x", BIGNUM_INT_BITS/4, c[i]); + } + printf("\n"); +#endif + + } else { + int i; + BignumInt carry; + BignumDblInt t; + const BignumInt *ap, *bp; + BignumInt *cp, *cps; + + /* + * Multiply in the ordinary O(N^2) way. + */ + + for (i = 0; i < 2 * len; i++) + c[i] = 0; + + for (cps = c + 2*len, ap = a + len; ap-- > a; cps--) { + carry = 0; + for (cp = cps, bp = b + len; cp--, bp-- > b ;) { + t = (MUL_WORD(*ap, *bp) + carry) + *cp; + *cp = (BignumInt) t; + carry = (BignumInt)(t >> BIGNUM_INT_BITS); + } + *cp = carry; + } + } +} + +/* + * Variant form of internal_mul used for the initial step of + * Montgomery reduction. Only bothers outputting 'len' words + * (everything above that is thrown away). + */ +static void internal_mul_low(const BignumInt *a, const BignumInt *b, + BignumInt *c, int len, BignumInt *scratch) +{ + if (len > KARATSUBA_THRESHOLD) { + int i; + + /* + * Karatsuba-aware version of internal_mul_low. As before, we + * express each input value as a shifted combination of two + * halves: + * + * a = a_1 D + a_0 + * b = b_1 D + b_0 + * + * Then the full product is, as before, + * + * ab = a_1 b_1 D^2 + (a_1 b_0 + a_0 b_1) D + a_0 b_0 + * + * Provided we choose D on the large side (so that a_0 and b_0 + * are _at least_ as long as a_1 and b_1), we don't need the + * topmost term at all, and we only need half of the middle + * term. So there's no point in doing the proper Karatsuba + * optimisation which computes the middle term using the top + * one, because we'd take as long computing the top one as + * just computing the middle one directly. + * + * So instead, we do a much more obvious thing: we call the + * fully optimised internal_mul to compute a_0 b_0, and we + * recursively call ourself to compute the _bottom halves_ of + * a_1 b_0 and a_0 b_1, each of which we add into the result + * in the obvious way. + * + * In other words, there's no actual Karatsuba _optimisation_ + * in this function; the only benefit in doing it this way is + * that we call internal_mul proper for a large part of the + * work, and _that_ can optimise its operation. + */ + + int toplen = len/2, botlen = len - toplen; /* botlen is the bigger */ + + /* + * Scratch space for the various bits and pieces we're going + * to be adding together: we need botlen*2 words for a_0 b_0 + * (though we may end up throwing away its topmost word), and + * toplen words for each of a_1 b_0 and a_0 b_1. That adds up + * to exactly 2*len. + */ + + /* a_0 b_0 */ + internal_mul(a + toplen, b + toplen, scratch + 2*toplen, botlen, + scratch + 2*len); + + /* a_1 b_0 */ + internal_mul_low(a, b + len - toplen, scratch + toplen, toplen, + scratch + 2*len); + + /* a_0 b_1 */ + internal_mul_low(a + len - toplen, b, scratch, toplen, + scratch + 2*len); + + /* Copy the bottom half of the big coefficient into place */ + for (i = 0; i < botlen; i++) + c[toplen + i] = scratch[2*toplen + botlen + i]; + + /* Add the two small coefficients, throwing away the returned carry */ + internal_add(scratch, scratch + toplen, scratch, toplen); + + /* And add that to the large coefficient, leaving the result in c. */ + internal_add(scratch, scratch + 2*toplen + botlen - toplen, + c, toplen); + + } else { + int i; + BignumInt carry; + BignumDblInt t; + const BignumInt *ap, *bp; + BignumInt *cp, *cps; + + /* + * Multiply in the ordinary O(N^2) way. + */ + + for (i = 0; i < len; i++) + c[i] = 0; + + for (cps = c + len, ap = a + len; ap-- > a; cps--) { + carry = 0; + for (cp = cps, bp = b + len; bp--, cp-- > c ;) { + t = (MUL_WORD(*ap, *bp) + carry) + *cp; + *cp = (BignumInt) t; + carry = (BignumInt)(t >> BIGNUM_INT_BITS); + } + } + } +} + +/* + * Montgomery reduction. Expects x to be a big-endian array of 2*len + * BignumInts whose value satisfies 0 <= x < rn (where r = 2^(len * + * BIGNUM_INT_BITS) is the Montgomery base). Returns in the same array + * a value x' which is congruent to xr^{-1} mod n, and satisfies 0 <= + * x' < n. + * + * 'n' and 'mninv' should be big-endian arrays of 'len' BignumInts + * each, containing respectively n and the multiplicative inverse of + * -n mod r. + * + * 'tmp' is an array of BignumInt used as scratch space, of length at + * least 3*len + mul_compute_scratch(len). + */ +static void monty_reduce(BignumInt *x, const BignumInt *n, + const BignumInt *mninv, BignumInt *tmp, int len) +{ + int i; + BignumInt carry; + + /* + * Multiply x by (-n)^{-1} mod r. This gives us a value m such + * that mn is congruent to -x mod r. Hence, mn+x is an exact + * multiple of r, and is also (obviously) congruent to x mod n. + */ + internal_mul_low(x + len, mninv, tmp, len, tmp + 3*len); + + /* + * Compute t = (mn+x)/r in ordinary, non-modular, integer + * arithmetic. By construction this is exact, and is congruent mod + * n to x * r^{-1}, i.e. the answer we want. + * + * The following multiply leaves that answer in the _most_ + * significant half of the 'x' array, so then we must shift it + * down. + */ + internal_mul(tmp, n, tmp+len, len, tmp + 3*len); + carry = internal_add(x, tmp+len, x, 2*len); + for (i = 0; i < len; i++) + x[len + i] = x[i], x[i] = 0; + + /* + * Reduce t mod n. This doesn't require a full-on division by n, + * but merely a test and single optional subtraction, since we can + * show that 0 <= t < 2n. + * + * Proof: + * + we computed m mod r, so 0 <= m < r. + * + so 0 <= mn < rn, obviously + * + hence we only need 0 <= x < rn to guarantee that 0 <= mn+x < 2rn + * + yielding 0 <= (mn+x)/r < 2n as required. + */ + if (!carry) { + for (i = 0; i < len; i++) + if (x[len + i] != n[i]) + break; + } + if (carry || i >= len || x[len + i] > n[i]) + internal_sub(x+len, n, x+len, len); } static void internal_add_shifted(BignumInt *number, @@ -279,13 +709,13 @@ static void internal_mod(BignumInt *a, int alen, } /* - * Compute (base ^ exp) % mod. + * Compute (base ^ exp) % mod, the pedestrian way. */ -Bignum modpow(Bignum base_in, Bignum exp, Bignum mod) +Bignum modpow_simple(Bignum base_in, Bignum exp, Bignum mod) { - BignumInt *a, *b, *n, *m; + BignumInt *a, *b, *n, *m, *scratch; int mshift; - int mlen, i, j; + int mlen, scratchlen, i, j; Bignum base, result; /* @@ -332,6 +762,10 @@ Bignum modpow(Bignum base_in, Bignum exp, Bignum mod) a[i] = 0; a[2 * mlen - 1] = 1; + /* Scratch space for multiplies */ + scratchlen = mul_compute_scratch(mlen); + scratch = snewn(scratchlen, BignumInt); + /* Skip leading zero bits of exp. */ i = 0; j = BIGNUM_INT_BITS-1; @@ -346,10 +780,10 @@ Bignum modpow(Bignum base_in, Bignum exp, Bignum mod) /* Main computation */ while (i < (int)exp[0]) { while (j >= 0) { - internal_mul(a + mlen, a + mlen, b, mlen); + internal_mul(a + mlen, a + mlen, b, mlen, scratch); internal_mod(b, mlen * 2, m, mlen, NULL, 0); if ((exp[exp[0] - i] & (1 << j)) != 0) { - internal_mul(b + mlen, n, a, mlen); + internal_mul(b + mlen, n, a, mlen, scratch); internal_mod(a, mlen * 2, m, mlen, NULL, 0); } else { BignumInt *t; @@ -384,6 +818,9 @@ Bignum modpow(Bignum base_in, Bignum exp, Bignum mod) for (i = 0; i < 2 * mlen; i++) a[i] = 0; sfree(a); + for (i = 0; i < scratchlen; i++) + scratch[i] = 0; + sfree(scratch); for (i = 0; i < 2 * mlen; i++) b[i] = 0; sfree(b); @@ -400,14 +837,165 @@ Bignum modpow(Bignum base_in, Bignum exp, Bignum mod) } /* + * Compute (base ^ exp) % mod. Uses the Montgomery multiplication + * technique where possible, falling back to modpow_simple otherwise. + */ +Bignum modpow(Bignum base_in, Bignum exp, Bignum mod) +{ + BignumInt *a, *b, *x, *n, *mninv, *scratch; + int len, scratchlen, i, j; + Bignum base, base2, r, rn, inv, result; + + /* + * The most significant word of mod needs to be non-zero. It + * should already be, but let's make sure. + */ + assert(mod[mod[0]] != 0); + + /* + * mod had better be odd, or we can't do Montgomery multiplication + * using a power of two at all. + */ + if (!(mod[1] & 1)) + return modpow_simple(base_in, exp, mod); + + /* + * Make sure the base is smaller than the modulus, by reducing + * it modulo the modulus if not. + */ + base = bigmod(base_in, mod); + + /* + * Compute the inverse of n mod r, for monty_reduce. (In fact we + * want the inverse of _minus_ n mod r, but we'll sort that out + * below.) + */ + len = mod[0]; + r = bn_power_2(BIGNUM_INT_BITS * len); + inv = modinv(mod, r); + + /* + * Multiply the base by r mod n, to get it into Montgomery + * representation. + */ + base2 = modmul(base, r, mod); + freebn(base); + base = base2; + + rn = bigmod(r, mod); /* r mod n, i.e. Montgomerified 1 */ + + freebn(r); /* won't need this any more */ + + /* + * Set up internal arrays of the right lengths, in big-endian + * format, containing the base, the modulus, and the modulus's + * inverse. + */ + n = snewn(len, BignumInt); + for (j = 0; j < len; j++) + n[len - 1 - j] = mod[j + 1]; + + mninv = snewn(len, BignumInt); + for (j = 0; j < len; j++) + mninv[len - 1 - j] = (j < (int)inv[0] ? inv[j + 1] : 0); + freebn(inv); /* we don't need this copy of it any more */ + /* Now negate mninv mod r, so it's the inverse of -n rather than +n. */ + x = snewn(len, BignumInt); + for (j = 0; j < len; j++) + x[j] = 0; + internal_sub(x, mninv, mninv, len); + + /* x = snewn(len, BignumInt); */ /* already done above */ + for (j = 0; j < len; j++) + x[len - 1 - j] = (j < (int)base[0] ? base[j + 1] : 0); + freebn(base); /* we don't need this copy of it any more */ + + a = snewn(2*len, BignumInt); + b = snewn(2*len, BignumInt); + for (j = 0; j < len; j++) + a[2*len - 1 - j] = (j < (int)rn[0] ? rn[j + 1] : 0); + freebn(rn); + + /* Scratch space for multiplies */ + scratchlen = 3*len + mul_compute_scratch(len); + scratch = snewn(scratchlen, BignumInt); + + /* Skip leading zero bits of exp. */ + i = 0; + j = BIGNUM_INT_BITS-1; + while (i < (int)exp[0] && (exp[exp[0] - i] & (1 << j)) == 0) { + j--; + if (j < 0) { + i++; + j = BIGNUM_INT_BITS-1; + } + } + + /* Main computation */ + while (i < (int)exp[0]) { + while (j >= 0) { + internal_mul(a + len, a + len, b, len, scratch); + monty_reduce(b, n, mninv, scratch, len); + if ((exp[exp[0] - i] & (1 << j)) != 0) { + internal_mul(b + len, x, a, len, scratch); + monty_reduce(a, n, mninv, scratch, len); + } else { + BignumInt *t; + t = a; + a = b; + b = t; + } + j--; + } + i++; + j = BIGNUM_INT_BITS-1; + } + + /* + * Final monty_reduce to get back from the adjusted Montgomery + * representation. + */ + monty_reduce(a, n, mninv, scratch, len); + + /* Copy result to buffer */ + result = newbn(mod[0]); + for (i = 0; i < len; i++) + result[result[0] - i] = a[i + len]; + while (result[0] > 1 && result[result[0]] == 0) + result[0]--; + + /* Free temporary arrays */ + for (i = 0; i < scratchlen; i++) + scratch[i] = 0; + sfree(scratch); + for (i = 0; i < 2 * len; i++) + a[i] = 0; + sfree(a); + for (i = 0; i < 2 * len; i++) + b[i] = 0; + sfree(b); + for (i = 0; i < len; i++) + mninv[i] = 0; + sfree(mninv); + for (i = 0; i < len; i++) + n[i] = 0; + sfree(n); + for (i = 0; i < len; i++) + x[i] = 0; + sfree(x); + + return result; +} + +/* * Compute (p * q) % mod. * The most significant word of mod MUST be non-zero. * We assume that the result array is the same size as the mod array. */ Bignum modmul(Bignum p, Bignum q, Bignum mod) { - BignumInt *a, *n, *m, *o; - int mshift; + BignumInt *a, *n, *m, *o, *scratch; + int mshift, scratchlen; int pqlen, mlen, rlen, i, j; Bignum result; @@ -449,8 +1037,12 @@ Bignum modmul(Bignum p, Bignum q, Bignum mod) /* Allocate a of size 2*pqlen for result */ a = snewn(2 * pqlen, BignumInt); + /* Scratch space for multiplies */ + scratchlen = mul_compute_scratch(pqlen); + scratch = snewn(scratchlen, BignumInt); + /* Main computation */ - internal_mul(n, o, a, pqlen); + internal_mul(n, o, a, pqlen, scratch); internal_mod(a, pqlen * 2, m, mlen, NULL, 0); /* Fixup result in case the modulus was shifted */ @@ -472,6 +1064,9 @@ Bignum modmul(Bignum p, Bignum q, Bignum mod) result[0]--; /* Free temporary arrays */ + for (i = 0; i < scratchlen; i++) + scratch[i] = 0; + sfree(scratch); for (i = 0; i < 2 * pqlen; i++) a[i] = 0; sfree(a); @@ -760,18 +1355,21 @@ Bignum bigmuladd(Bignum a, Bignum b, Bignum addend) int alen = a[0], blen = b[0]; int mlen = (alen > blen ? alen : blen); int rlen, i, maxspot; + int wslen; BignumInt *workspace; Bignum ret; - /* mlen space for a, mlen space for b, 2*mlen for result */ - workspace = snewn(mlen * 4, BignumInt); + /* mlen space for a, mlen space for b, 2*mlen for result, + * plus scratch space for multiplication */ + wslen = mlen * 4 + mul_compute_scratch(mlen); + workspace = snewn(wslen, BignumInt); for (i = 0; i < mlen; i++) { workspace[0 * mlen + i] = (mlen - i <= (int)a[0] ? a[mlen - i] : 0); workspace[1 * mlen + i] = (mlen - i <= (int)b[0] ? b[mlen - i] : 0); } internal_mul(workspace + 0 * mlen, workspace + 1 * mlen, - workspace + 2 * mlen, mlen); + workspace + 2 * mlen, mlen, workspace + 4 * mlen); /* now just copy the result back */ rlen = alen + blen + 1; @@ -800,6 +1398,8 @@ Bignum bigmuladd(Bignum a, Bignum b, Bignum addend) } ret[0] = maxspot; + for (i = 0; i < wslen; i++) + workspace[i] = 0; sfree(workspace); return ret; } @@ -813,6 +1413,69 @@ Bignum bigmul(Bignum a, Bignum b) } /* + * Simple addition. + */ +Bignum bigadd(Bignum a, Bignum b) +{ + int alen = a[0], blen = b[0]; + int rlen = (alen > blen ? alen : blen) + 1; + int i, maxspot; + Bignum ret; + BignumDblInt carry; + + ret = newbn(rlen); + + carry = 0; + maxspot = 0; + for (i = 1; i <= rlen; i++) { + carry += (i <= (int)a[0] ? a[i] : 0); + carry += (i <= (int)b[0] ? b[i] : 0); + ret[i] = (BignumInt) carry & BIGNUM_INT_MASK; + carry >>= BIGNUM_INT_BITS; + if (ret[i] != 0 && i > maxspot) + maxspot = i; + } + ret[0] = maxspot; + + return ret; +} + +/* + * Subtraction. Returns a-b, or NULL if the result would come out + * negative (recall that this entire bignum module only handles + * positive numbers). + */ +Bignum bigsub(Bignum a, Bignum b) +{ + int alen = a[0], blen = b[0]; + int rlen = (alen > blen ? alen : blen); + int i, maxspot; + Bignum ret; + BignumDblInt carry; + + ret = newbn(rlen); + + carry = 1; + maxspot = 0; + for (i = 1; i <= rlen; i++) { + carry += (i <= (int)a[0] ? a[i] : 0); + carry += (i <= (int)b[0] ? b[i] ^ BIGNUM_INT_MASK : BIGNUM_INT_MASK); + ret[i] = (BignumInt) carry & BIGNUM_INT_MASK; + carry >>= BIGNUM_INT_BITS; + if (ret[i] != 0 && i > maxspot) + maxspot = i; + } + ret[0] = maxspot; + + if (!carry) { + freebn(ret); + return NULL; + } + + return ret; +} + +/* * Create a bignum which is the bitmask covering another one. That * is, the smallest integer which is >= N and is also one less than * a power of two. @@ -1090,3 +1753,166 @@ char *bignum_decimal(Bignum x) sfree(workspace); return ret; } + +#ifdef TESTBN + +#include +#include +#include + +/* + * gcc -g -O0 -DTESTBN -o testbn sshbn.c misc.c -I unix -I charset + * + * Then feed to this program's standard input the output of + * testdata/bignum.py . + */ + +void modalfatalbox(char *p, ...) +{ + va_list ap; + fprintf(stderr, "FATAL ERROR: "); + va_start(ap, p); + vfprintf(stderr, p, ap); + va_end(ap); + fputc('\n', stderr); + exit(1); +} + +#define fromxdigit(c) ( (c)>'9' ? ((c)&0xDF) - 'A' + 10 : (c) - '0' ) + +int main(int argc, char **argv) +{ + char *buf; + int line = 0; + int passes = 0, fails = 0; + + while ((buf = fgetline(stdin)) != NULL) { + int maxlen = strlen(buf); + unsigned char *data = snewn(maxlen, unsigned char); + unsigned char *ptrs[5], *q; + int ptrnum; + char *bufp = buf; + + line++; + + q = data; + ptrnum = 0; + + while (*bufp && !isspace((unsigned char)*bufp)) + bufp++; + if (bufp) + *bufp++ = '\0'; + + while (*bufp) { + char *start, *end; + int i; + + while (*bufp && !isxdigit((unsigned char)*bufp)) + bufp++; + start = bufp; + + if (!*bufp) + break; + + while (*bufp && isxdigit((unsigned char)*bufp)) + bufp++; + end = bufp; + + if (ptrnum >= lenof(ptrs)) + break; + ptrs[ptrnum++] = q; + + for (i = -((end - start) & 1); i < end-start; i += 2) { + unsigned char val = (i < 0 ? 0 : fromxdigit(start[i])); + val = val * 16 + fromxdigit(start[i+1]); + *q++ = val; + } + + ptrs[ptrnum] = q; + } + + if (!strcmp(buf, "mul")) { + Bignum a, b, c, p; + + if (ptrnum != 3) { + printf("%d: mul with %d parameters, expected 3\n", line); + exit(1); + } + a = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]); + b = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]); + c = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]); + p = bigmul(a, b); + + if (bignum_cmp(c, p) == 0) { + passes++; + } else { + char *as = bignum_decimal(a); + char *bs = bignum_decimal(b); + char *cs = bignum_decimal(c); + char *ps = bignum_decimal(p); + + printf("%d: fail: %s * %s gave %s expected %s\n", + line, as, bs, ps, cs); + fails++; + + sfree(as); + sfree(bs); + sfree(cs); + sfree(ps); + } + freebn(a); + freebn(b); + freebn(c); + freebn(p); + } else if (!strcmp(buf, "pow")) { + Bignum base, expt, modulus, expected, answer; + + if (ptrnum != 4) { + printf("%d: mul with %d parameters, expected 3\n", line); + exit(1); + } + + base = bignum_from_bytes(ptrs[0], ptrs[1]-ptrs[0]); + expt = bignum_from_bytes(ptrs[1], ptrs[2]-ptrs[1]); + modulus = bignum_from_bytes(ptrs[2], ptrs[3]-ptrs[2]); + expected = bignum_from_bytes(ptrs[3], ptrs[4]-ptrs[3]); + answer = modpow(base, expt, modulus); + + if (bignum_cmp(expected, answer) == 0) { + passes++; + } else { + char *as = bignum_decimal(base); + char *bs = bignum_decimal(expt); + char *cs = bignum_decimal(modulus); + char *ds = bignum_decimal(answer); + char *ps = bignum_decimal(expected); + + printf("%d: fail: %s ^ %s mod %s gave %s expected %s\n", + line, as, bs, cs, ds, ps); + fails++; + + sfree(as); + sfree(bs); + sfree(cs); + sfree(ds); + sfree(ps); + } + freebn(base); + freebn(expt); + freebn(modulus); + freebn(expected); + freebn(answer); + } else { + printf("%d: unrecognised test keyword: '%s'\n", line, buf); + exit(1); + } + + sfree(buf); + sfree(data); + } + + printf("passed %d failed %d total %d\n", passes, fails, passes+fails); + return fails != 0; +} + +#endif diff --git a/src/TortoisePlink/SSHRSA.C b/src/TortoisePlink/SSHRSA.C index d06e9d6f4..ea6440bc5 100644 --- a/src/TortoisePlink/SSHRSA.C +++ b/src/TortoisePlink/SSHRSA.C @@ -114,9 +114,83 @@ static void sha512_mpint(SHA512_State * s, Bignum b) } /* - * This function is a wrapper on modpow(). It has the same effect - * as modpow(), but employs RSA blinding to protect against timing - * attacks. + * Compute (base ^ exp) % mod, provided mod == p * q, with p,q + * distinct primes, and iqmp is the multiplicative inverse of q mod p. + * Uses Chinese Remainder Theorem to speed computation up over the + * obvious implementation of a single big modpow. + */ +Bignum crt_modpow(Bignum base, Bignum exp, Bignum mod, + Bignum p, Bignum q, Bignum iqmp) +{ + Bignum pm1, qm1, pexp, qexp, presult, qresult, diff, multiplier, ret0, ret; + + /* + * Reduce the exponent mod phi(p) and phi(q), to save time when + * exponentiating mod p and mod q respectively. Of course, since p + * and q are prime, phi(p) == p-1 and similarly for q. + */ + pm1 = copybn(p); + decbn(pm1); + qm1 = copybn(q); + decbn(qm1); + pexp = bigmod(exp, pm1); + qexp = bigmod(exp, qm1); + + /* + * Do the two modpows. + */ + presult = modpow(base, pexp, p); + qresult = modpow(base, qexp, q); + + /* + * Recombine the results. We want a value which is congruent to + * qresult mod q, and to presult mod p. + * + * We know that iqmp * q is congruent to 1 * mod p (by definition + * of iqmp) and to 0 mod q (obviously). So we start with qresult + * (which is congruent to qresult mod both primes), and add on + * (presult-qresult) * (iqmp * q) which adjusts it to be congruent + * to presult mod p without affecting its value mod q. + */ + if (bignum_cmp(presult, qresult) < 0) { + /* + * Can't subtract presult from qresult without first adding on + * p. + */ + Bignum tmp = presult; + presult = bigadd(presult, p); + freebn(tmp); + } + diff = bigsub(presult, qresult); + multiplier = bigmul(iqmp, q); + ret0 = bigmuladd(multiplier, diff, qresult); + + /* + * Finally, reduce the result mod n. + */ + ret = bigmod(ret0, mod); + + /* + * Free all the intermediate results before returning. + */ + freebn(pm1); + freebn(qm1); + freebn(pexp); + freebn(qexp); + freebn(presult); + freebn(qresult); + freebn(diff); + freebn(multiplier); + freebn(ret0); + + return ret; +} + +/* + * This function is a wrapper on modpow(). It has the same effect as + * modpow(), but employs RSA blinding to protect against timing + * attacks and also uses the Chinese Remainder Theorem (implemented + * above, in crt_modpow()) to speed up the main operation. */ static Bignum rsa_privkey_op(Bignum input, struct RSAKey *key) { @@ -218,10 +292,12 @@ static Bignum rsa_privkey_op(Bignum input, struct RSAKey *key) * _y^d_, and use the _public_ exponent to compute (y^d)^e = y * from it, which is much faster to do. */ - random_encrypted = modpow(random, key->exponent, key->modulus); + random_encrypted = crt_modpow(random, key->exponent, + key->modulus, key->p, key->q, key->iqmp); random_inverse = modinv(random, key->modulus); input_blinded = modmul(input, random_encrypted, key->modulus); - ret_blinded = modpow(input_blinded, key->private_exponent, key->modulus); + ret_blinded = crt_modpow(input_blinded, key->private_exponent, + key->modulus, key->p, key->q, key->iqmp); ret = modmul(ret_blinded, random_inverse, key->modulus); freebn(ret_blinded); diff --git a/src/TortoisePlink/SSHZLIB.C b/src/TortoisePlink/SSHZLIB.C index 7d37141c7..9c780a41f 100644 --- a/src/TortoisePlink/SSHZLIB.C +++ b/src/TortoisePlink/SSHZLIB.C @@ -1259,6 +1259,8 @@ int zlib_decompress_block(void *handle, unsigned char *block, int len, goto finished; nlen = dctx->bits & 0xFFFF; EATBITS(16); + if (dctx->uncomplen != (nlen ^ 0xFFFF)) + goto decode_error; if (dctx->uncomplen == 0) dctx->state = OUTSIDEBLK; /* block is empty */ else @@ -1369,6 +1371,7 @@ int main(int argc, char **argv) const struct ssh_compress ssh_zlib = { "zlib", + "zlib@openssh.com", /* delayed version */ zlib_compress_init, zlib_compress_cleanup, zlib_compress_block, diff --git a/src/TortoisePlink/Windows/MSVC/Plink/TortoisePlink.vcproj b/src/TortoisePlink/Windows/MSVC/Plink/TortoisePlink.vcproj index 864bd81df..9f4f1bee1 100644 --- a/src/TortoisePlink/Windows/MSVC/Plink/TortoisePlink.vcproj +++ b/src/TortoisePlink/Windows/MSVC/Plink/TortoisePlink.vcproj @@ -51,7 +51,7 @@ = len-1); + } while (strlen(hostname) >= (size_t)(len-1)); return hostname; } diff --git a/src/TortoisePlink/Windows/WINPGNTC.C b/src/TortoisePlink/Windows/WINPGNTC.C index 2a5aa734f..0dabe7167 100644 --- a/src/TortoisePlink/Windows/WINPGNTC.C +++ b/src/TortoisePlink/Windows/WINPGNTC.C @@ -86,15 +86,70 @@ DECL_WINDOWS_FUNCTION(static, BOOL, InitializeSecurityDescriptor, (PSECURITY_DESCRIPTOR, DWORD)); DECL_WINDOWS_FUNCTION(static, BOOL, SetSecurityDescriptorOwner, (PSECURITY_DESCRIPTOR, PSID, BOOL)); -static int init_advapi(void) +DECL_WINDOWS_FUNCTION(, DWORD, GetSecurityInfo, + (HANDLE, SE_OBJECT_TYPE, SECURITY_INFORMATION, + PSID *, PSID *, PACL *, PACL *, + PSECURITY_DESCRIPTOR *)); +int init_advapi(void) { advapi = load_system32_dll("advapi32.dll"); return advapi && + GET_WINDOWS_FUNCTION(advapi, GetSecurityInfo) && GET_WINDOWS_FUNCTION(advapi, OpenProcessToken) && GET_WINDOWS_FUNCTION(advapi, GetTokenInformation) && GET_WINDOWS_FUNCTION(advapi, InitializeSecurityDescriptor) && GET_WINDOWS_FUNCTION(advapi, SetSecurityDescriptorOwner); } + +PSID get_user_sid(void) +{ + HANDLE proc = NULL, tok = NULL; + TOKEN_USER *user = NULL; + DWORD toklen, sidlen; + PSID sid = NULL, ret = NULL; + + if ((proc = OpenProcess(MAXIMUM_ALLOWED, FALSE, + GetCurrentProcessId())) == NULL) + goto cleanup; + + if (!p_OpenProcessToken(proc, TOKEN_QUERY, &tok)) + goto cleanup; + + if (!p_GetTokenInformation(tok, TokenUser, NULL, 0, &toklen) && + GetLastError() != ERROR_INSUFFICIENT_BUFFER) + goto cleanup; + + if ((user = (TOKEN_USER *)LocalAlloc(LPTR, toklen)) == NULL) + goto cleanup; + + if (!p_GetTokenInformation(tok, TokenUser, user, toklen, &toklen)) + goto cleanup; + + sidlen = GetLengthSid(user->User.Sid); + + sid = (PSID)smalloc(sidlen); + + if (!CopySid(sidlen, sid, user->User.Sid)) + goto cleanup; + + /* Success. Move sid into the return value slot, and null it out + * to stop the cleanup code freeing it. */ + ret = sid; + sid = NULL; + + cleanup: + if (proc != NULL) + CloseHandle(proc); + if (tok != NULL) + CloseHandle(tok); + if (user != NULL) + LocalFree(user); + if (sid != NULL) + sfree(sid); + + return ret; +} + #endif int agent_query(void *in, int inlen, void **out, int *outlen, @@ -108,8 +163,7 @@ int agent_query(void *in, int inlen, void **out, int *outlen, COPYDATASTRUCT cds; SECURITY_ATTRIBUTES sa, *psa; PSECURITY_DESCRIPTOR psd = NULL; - HANDLE proc, tok; - TOKEN_USER *user = NULL; + PSID usersid = NULL; *out = NULL; *outlen = 0; @@ -130,31 +184,16 @@ int agent_query(void *in, int inlen, void **out, int *outlen, * run PSFTPs which refer back to the owning user's * unprivileged Pageant. */ - - if ((proc = OpenProcess(MAXIMUM_ALLOWED, FALSE, - GetCurrentProcessId())) != NULL) { - if (p_OpenProcessToken(proc, TOKEN_QUERY, &tok)) { - DWORD retlen; - p_GetTokenInformation(tok, TokenUser, NULL, 0, &retlen); - user = (TOKEN_USER *)LocalAlloc(LPTR, retlen); - if (!p_GetTokenInformation(tok, TokenUser, - user, retlen, &retlen)) { - LocalFree(user); - user = NULL; - } - CloseHandle(tok); - } - CloseHandle(proc); - } + usersid = get_user_sid(); psa = NULL; - if (user) { + if (usersid) { psd = (PSECURITY_DESCRIPTOR) LocalAlloc(LPTR, SECURITY_DESCRIPTOR_MIN_LENGTH); if (psd) { if (p_InitializeSecurityDescriptor (psd, SECURITY_DESCRIPTOR_REVISION) && - p_SetSecurityDescriptorOwner(psd, user->User.Sid, FALSE)) { + p_SetSecurityDescriptorOwner(psd, usersid, FALSE)) { sa.nLength = sizeof(sa); sa.bInheritHandle = TRUE; sa.lpSecurityDescriptor = psd; @@ -221,7 +260,6 @@ int agent_query(void *in, int inlen, void **out, int *outlen, CloseHandle(filemap); if (psd) LocalFree(psd); - if (user) - LocalFree(user); + sfree(usersid); return 1; } diff --git a/src/TortoisePlink/Windows/WINSTUFF.H b/src/TortoisePlink/Windows/WINSTUFF.H index 1cc48348e..81890a81f 100644 --- a/src/TortoisePlink/Windows/WINSTUFF.H +++ b/src/TortoisePlink/Windows/WINSTUFF.H @@ -497,7 +497,7 @@ int handle_backlog(struct handle *h); void *handle_get_privdata(struct handle *h); /* - * pageantc.c needs to schedule callbacks for asynchronous agent + * winpgntc.c needs to schedule callbacks for asynchronous agent * requests. This has to be done differently in GUI and console, so * there's an exported function used for the purpose. * @@ -509,6 +509,14 @@ void agent_schedule_callback(void (*callback)(void *, void *, int), #define FLAG_SYNCAGENT 0x1000 /* + * winpgntc.c also exports these two functions which are used by the + * server side of Pageant as well, to get the user SID for comparing + * with clients'. + */ +int init_advapi(void); /* initialises everything needed by get_user_sid */ +PSID get_user_sid(void); + +/* * Exports from winser.c. */ extern Backend serial_backend; diff --git a/src/TortoisePlink/Windows/wingss.c b/src/TortoisePlink/Windows/wingss.c index 4efad5d58..97198188c 100644 --- a/src/TortoisePlink/Windows/wingss.c +++ b/src/TortoisePlink/Windows/wingss.c @@ -18,10 +18,10 @@ const char *const gsslibnames[3] = { "Microsoft SSPI SECUR32.DLL", "User-specified GSSAPI DLL", }; -const struct keyval gsslibkeywords[] = { - { "gssapi32", 0 }, - { "sspi", 1 }, - { "custom", 2 }, +const struct keyvalwhere gsslibkeywords[] = { + { "gssapi32", 0, -1, -1 }, + { "sspi", 1, -1, -1 }, + { "custom", 2, -1, -1 }, }; DECL_WINDOWS_FUNCTION(static, SECURITY_STATUS, @@ -91,7 +91,11 @@ struct ssh_gss_liblist *ssh_gss_setup(const Config *cfg) ret = RegQueryValueEx(regkey, "InstallDir", NULL, &type, buffer, &size); if (ret == ERROR_SUCCESS && type == REG_SZ) { - strcat(buffer, "\\bin\\gssapi32.dll"); +#ifdef _WIN64 + strcat(buffer, "\\bin\\gssapi64.dll"); +#else + strcat(buffer, "\\bin\\gssapi32.dll"); +#endif module = LoadLibrary(buffer); } sfree(buffer); -- 2.11.4.GIT