1 /* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 
3 #include "proxy.h"
4 
5 #define PARSER_MAXLEN USHRT_MAX-1
6 
7 // Find the starting offsets of each token; ignoring length.
8 // This creates a fast small (<= cacheline) index into the request,
9 // where we later scan or directly feed data into API's.
_process_tokenize(mcp_parser_t * pr,const size_t max)10 static int _process_tokenize(mcp_parser_t *pr, const size_t max) {
11     const char *s = pr->request;
12     int len = pr->endlen;
13 
14     // since multigets can be huge, we can't purely judge reqlen against this
15     // limit, but we also can't index past it since the tokens are shorts.
16     if (len > PARSER_MAXLEN) {
17         len = PARSER_MAXLEN;
18     }
19     const char *end = s + len;
20     int curtoken = 0;
21 
22     int state = 0;
23     while (s != end) {
24         switch (state) {
25             case 0:
26                 // scanning for first non-space to find a token.
27                 if (*s != ' ') {
28                     pr->tokens[curtoken] = s - pr->request;
29                     if (++curtoken == max) {
30                         s++;
31                         state = 2;
32                         break;
33                     }
34                     state = 1;
35                 }
36                 s++;
37                 break;
38             case 1:
39                 // advance over a token
40                 if (*s != ' ') {
41                     s++;
42                 } else {
43                     state = 0;
44                 }
45                 break;
46             case 2:
47                 // hit max tokens before end of the line.
48                 // keep advancing so we can place endcap token.
49                 if (*s == ' ') {
50                     goto endloop;
51                 }
52                 s++;
53                 break;
54         }
55     }
56 endloop:
57 
58     // endcap token so we can quickly find the length of any token by looking
59     // at the next one.
60     pr->tokens[curtoken] = s - pr->request;
61     pr->ntokens = curtoken;
62     P_DEBUG("%s: cur_tokens: %d\n", __func__, curtoken);
63 
64     return 0;
65 }
66 
_process_token_len(mcp_parser_t * pr,size_t token)67 static int _process_token_len(mcp_parser_t *pr, size_t token) {
68     const char *s = pr->request + pr->tokens[token];
69     const char *e = pr->request + pr->tokens[token+1];
70     // start of next token is after any space delimiters, so back those out.
71     while (*(e-1) == ' ') {
72         e--;
73     }
74     return e - s;
75 }
76 
_process_request_key(mcp_parser_t * pr)77 static int _process_request_key(mcp_parser_t *pr) {
78     pr->klen = _process_token_len(pr, pr->keytoken);
79     // advance the parser in case of multikey.
80     pr->parsed = pr->tokens[pr->keytoken] + pr->klen + 1;
81 
82     if (pr->request[pr->parsed-1] == ' ') {
83         P_DEBUG("%s: request_key found extra space\n", __func__);
84         pr->has_space = true;
85     } else {
86         pr->has_space = false;
87     }
88     return 0;
89 }
90 
91 // Just for ascii multiget: search for next "key" beyond where we stopped
92 // tokenizing before.
93 // Returns the offset for the next key.
_process_request_next_key(mcp_parser_t * pr)94 size_t _process_request_next_key(mcp_parser_t *pr) {
95     const char *cur = pr->request + pr->parsed;
96     int remain = pr->endlen - pr->parsed;
97 
98     // chew off any leading whitespace.
99     while (remain) {
100         if (*cur == ' ') {
101             remain--;
102             cur++;
103             pr->parsed++;
104         } else {
105             break;
106         }
107     }
108 
109     const char *s = memchr(cur, ' ', remain);
110     if (s != NULL) {
111         pr->klen = s - cur;
112         pr->parsed += s - cur;
113     } else {
114         pr->klen = remain;
115         pr->parsed += remain;
116     }
117 
118     return cur - pr->request;
119 }
120 
121 // for fast testing of existence of meta flags.
122 // meta has all flags as final tokens
_process_request_metaflags(mcp_parser_t * pr,int token)123 static int _process_request_metaflags(mcp_parser_t *pr, int token) {
124     if (pr->ntokens <= token) {
125         pr->t.meta.flags = 0; // no flags found.
126         return 0;
127     }
128     const char *cur = pr->request + pr->tokens[token];
129     const char *end = pr->request + pr->endlen;
130 
131     // We blindly convert flags into bits, since the range of possible
132     // flags is deliberately < 64.
133     int state = 0;
134     while (cur != end) {
135         switch (state) {
136             case 0:
137                 if (*cur == ' ') {
138                     cur++;
139                 } else {
140                     if (*cur < 65 || *cur > 122) {
141                         return -1;
142                     }
143                     P_DEBUG("%s: setting meta flag: %d\n", __func__, *cur - 65);
144                     pr->t.meta.flags |= (uint64_t)1 << (*cur - 65);
145                     state = 1;
146                 }
147                 break;
148             case 1:
149                 if (*cur != ' ') {
150                     cur++;
151                 } else {
152                     state = 0;
153                 }
154                 break;
155         }
156     }
157 
158     // not too great hack for noreply detection: this can be flattened out
159     // once a few other contexts are fixed and we detect the noreply from the
160     // coroutine start instead.
161     if (pr->t.meta.flags & ((uint64_t)1 << 48)) {
162         pr->noreply = true;
163     }
164 
165     return 0;
166 }
167 
168 // All meta commands are of form: "cm key f l a g S100"
_process_request_meta(mcp_parser_t * pr)169 static int _process_request_meta(mcp_parser_t *pr) {
170     _process_tokenize(pr, PARSER_MAX_TOKENS);
171     if (pr->ntokens < 2) {
172         P_DEBUG("%s: not enough tokens for meta command: %d\n", __func__, pr->ntokens);
173         return -1;
174     }
175     pr->keytoken = 1;
176     _process_request_key(pr);
177 
178     // pass the first flag token.
179     return _process_request_metaflags(pr, 2);
180 }
181 
182 // ms <key> <datalen> <flags>*\r\n
_process_request_mset(mcp_parser_t * pr)183 static int _process_request_mset(mcp_parser_t *pr) {
184     _process_tokenize(pr, PARSER_MAX_TOKENS);
185     if (pr->ntokens < 3) {
186         P_DEBUG("%s: not enough tokens for meta set command: %d\n", __func__, pr->ntokens);
187         return -1;
188     }
189     pr->keytoken = 1;
190     _process_request_key(pr);
191 
192     const char *cur = pr->request + pr->tokens[2];
193 
194     errno = 0;
195     char *n = NULL;
196     int vlen = strtol(cur, &n, 10);
197     if ((errno == ERANGE) || (cur == n)) {
198         return -1;
199     }
200 
201     if (vlen < 0 || vlen > (INT_MAX - 2)) {
202        return -1;
203     }
204     vlen += 2;
205 
206     pr->vlen = vlen;
207 
208     // pass the first flag token
209     return _process_request_metaflags(pr, 3);
210 }
211 
212 // gat[s] <exptime> <key>*\r\n
_process_request_gat(mcp_parser_t * pr)213 static int _process_request_gat(mcp_parser_t *pr) {
214     _process_tokenize(pr, 3);
215     if (pr->ntokens < 3) {
216         P_DEBUG("%s: not enough tokens for GAT: %d\n", __func__, pr->ntokens);
217         return -1;
218     }
219 
220     pr->keytoken = 2;
221     _process_request_key(pr);
222     return 0;
223 }
224 
225 #define NOREPLYSTR "noreply"
226 #define NOREPLYLEN sizeof(NOREPLYSTR)-1
227 // given a tokenized parser for a normal ASCII command, checks for noreply
228 // mode.
_process_request_noreply(mcp_parser_t * pr)229 static int _process_request_noreply(mcp_parser_t *pr) {
230     if (pr->tokens[pr->ntokens] - pr->tokens[pr->ntokens-1] >= NOREPLYLEN
231             && strncmp(NOREPLYSTR, pr->request + pr->tokens[pr->ntokens-1], NOREPLYLEN) == 0) {
232         pr->noreply = true;
233     }
234     return 0;
235 }
236 
237 // we need t find the bytes supplied immediately so we can read the request
238 // from the client properly.
239 // set <key> <flags> <exptime> <bytes> [noreply]\r\n
_process_request_storage(mcp_parser_t * pr,size_t max)240 static int _process_request_storage(mcp_parser_t *pr, size_t max) {
241     _process_tokenize(pr, max);
242     if (pr->ntokens < 5) {
243         P_DEBUG("%s: not enough tokens to storage command: %d\n", __func__, pr->ntokens);
244         return -1;
245     }
246     pr->keytoken = 1;
247     _process_request_key(pr);
248 
249     errno = 0;
250     char *n = NULL;
251     const char *cur = pr->request + pr->tokens[4];
252 
253     int vlen = strtol(cur, &n, 10);
254     if ((errno == ERANGE) || (cur == n)) {
255         return -1;
256     }
257 
258     if (vlen < 0 || vlen > (INT_MAX - 2)) {
259        return -1;
260     }
261     vlen += 2;
262 
263     pr->vlen = vlen;
264 
265     return _process_request_noreply(pr);
266 }
267 
268 // common request with key: <cmd> <key> <args>
_process_request_simple(mcp_parser_t * pr,const int min,const int max)269 static int _process_request_simple(mcp_parser_t *pr, const int min, const int max) {
270     _process_tokenize(pr, max);
271     if (pr->ntokens < min) {
272         P_DEBUG("%s: not enough tokens for simple request: %d\n", __func__, pr->ntokens);
273         return -1;
274     }
275     pr->keytoken = 1; // second token is usually the key... stupid GAT.
276 
277     _process_request_key(pr);
278     return _process_request_noreply(pr);
279 }
280 
281 // TODO: return code ENUM with error types.
282 // FIXME: the mcp_parser_t bits have ended up being more fragile than I hoped.
283 // careful zero'ing is required. revisit?
284 // I think this mostly refers to recursive work (maybe just multiget?)
285 // Is a parser object run throgh process_request() twice, ever?
process_request(mcp_parser_t * pr,const char * command,size_t cmdlen)286 int process_request(mcp_parser_t *pr, const char *command, size_t cmdlen) {
287     // we want to "parse in place" as much as possible, which allows us to
288     // forward an unmodified request without having to rebuild it.
289 
290     const char *cm = command;
291     size_t cl = 0;
292     // min command length is 2, plus the "\r\n"
293     if (cmdlen < 4) {
294         return -1;
295     }
296 
297     // Commands can end with bare '\n's. Depressingly I intended to be strict
298     // with a \r\n requirement but never did this and need backcompat.
299     // In this case we _know_ \n is at cmdlen because we can't enter this
300     // function otherwise.
301     if (cm[cmdlen-2] == '\r') {
302         pr->endlen = cmdlen - 2;
303     } else {
304         pr->endlen = cmdlen - 1;
305     }
306 
307     const char *s = memchr(command, ' ', pr->endlen);
308     if (s != NULL) {
309         cl = s - command;
310     } else {
311         cl = pr->endlen;
312     }
313     pr->keytoken = 0;
314     pr->has_space = false;
315     pr->parsed = cl;
316     pr->request = command;
317     pr->reqlen = cmdlen;
318     int token_max = PARSER_MAX_TOKENS;
319 
320     int cmd = -1;
321     int type = CMD_TYPE_GENERIC;
322     int ret = 0;
323 
324     switch (cl) {
325         case 0:
326         case 1:
327             // falls through with cmd as -1. should error.
328             break;
329         case 2:
330             if (cm[0] == 'm') {
331                 type = CMD_TYPE_META;
332                 switch (cm[1]) {
333                     case 'g':
334                         cmd = CMD_MG;
335                         ret = _process_request_meta(pr);
336                         break;
337                     case 's':
338                         cmd = CMD_MS;
339                         ret = _process_request_mset(pr);
340                         break;
341                     case 'd':
342                         cmd = CMD_MD;
343                         ret = _process_request_meta(pr);
344                         break;
345                     case 'n':
346                         // TODO: do we route/handle NOP's at all?
347                         // they should simply reflect to the client.
348                         cmd = CMD_MN;
349                         break;
350                     case 'a':
351                         cmd = CMD_MA;
352                         ret = _process_request_meta(pr);
353                         break;
354                     case 'e':
355                         cmd = CMD_ME;
356                         // TODO: not much special processing here; binary keys
357                         ret = _process_request_meta(pr);
358                         break;
359                 }
360             }
361             break;
362         case 3:
363             if (cm[0] == 'g') {
364                 if (cm[1] == 'e' && cm[2] == 't') {
365                     cmd = CMD_GET;
366                     type = CMD_TYPE_GET;
367                     token_max = 2; // don't chew through multigets.
368                     ret = _process_request_simple(pr, 2, 2);
369                 }
370                 if (cm[1] == 'a' && cm[2] == 't') {
371                     type = CMD_TYPE_GET;
372                     cmd = CMD_GAT;
373                     token_max = 2; // don't chew through multigets.
374                     ret = _process_request_gat(pr);
375                 }
376             } else if (cm[0] == 's' && cm[1] == 'e' && cm[2] == 't') {
377                 cmd = CMD_SET;
378                 ret = _process_request_storage(pr, token_max);
379             } else if (cm[0] == 'a' && cm[1] == 'd' && cm[2] == 'd') {
380                 cmd = CMD_ADD;
381                 ret = _process_request_storage(pr, token_max);
382             } else if (cm[0] == 'c' && cm[1] == 'a' && cm[2] == 's') {
383                 cmd = CMD_CAS;
384                 ret = _process_request_storage(pr, token_max);
385             }
386             break;
387         case 4:
388             if (strncmp(cm, "gets", 4) == 0) {
389                 cmd = CMD_GETS;
390                 type = CMD_TYPE_GET;
391                 token_max = 2; // don't chew through multigets.
392                 ret = _process_request_simple(pr, 2, 2);
393             } else if (strncmp(cm, "incr", 4) == 0) {
394                 cmd = CMD_INCR;
395                 ret = _process_request_simple(pr, 3, 4);
396             } else if (strncmp(cm, "decr", 4) == 0) {
397                 cmd = CMD_DECR;
398                 ret = _process_request_simple(pr, 3, 4);
399             } else if (strncmp(cm, "gats", 4) == 0) {
400                 cmd = CMD_GATS;
401                 type = CMD_TYPE_GET;
402                 ret = _process_request_gat(pr);
403             } else if (strncmp(cm, "quit", 4) == 0) {
404                 cmd = CMD_QUIT;
405             }
406             break;
407         case 5:
408             if (strncmp(cm, "touch", 5) == 0) {
409                 cmd = CMD_TOUCH;
410                 ret = _process_request_simple(pr, 3, 4);
411             } else if (strncmp(cm, "stats", 5) == 0) {
412                 cmd = CMD_STATS;
413                 // Don't process a key; fetch via arguments.
414                 _process_tokenize(pr, token_max);
415             } else if (strncmp(cm, "watch", 5) == 0) {
416                 cmd = CMD_WATCH;
417                 _process_tokenize(pr, token_max);
418             }
419             break;
420         case 6:
421             if (strncmp(cm, "delete", 6) == 0) {
422                 cmd = CMD_DELETE;
423                 ret = _process_request_simple(pr, 2, 4);
424             } else if (strncmp(cm, "append", 6) == 0) {
425                 cmd = CMD_APPEND;
426                 ret = _process_request_storage(pr, token_max);
427             }
428             break;
429         case 7:
430             if (strncmp(cm, "replace", 7) == 0) {
431                 cmd = CMD_REPLACE;
432                 ret = _process_request_storage(pr, token_max);
433             } else if (strncmp(cm, "prepend", 7) == 0) {
434                 cmd = CMD_PREPEND;
435                 ret = _process_request_storage(pr, token_max);
436             } else if (strncmp(cm, "version", 7) == 0) {
437                 cmd = CMD_VERSION;
438                 _process_tokenize(pr, token_max);
439             }
440             break;
441     }
442 
443     // TODO: log more specific error code.
444     if (cmd == -1 || ret != 0) {
445         return -1;
446     }
447 
448     pr->command = cmd;
449     pr->cmd_type = type;
450 
451     return 0;
452 }
453 
454 // FIXME (v2): any reason to pass in command/cmdlen separately?
mcp_new_request(lua_State * L,mcp_parser_t * pr,const char * command,size_t cmdlen)455 mcp_request_t *mcp_new_request(lua_State *L, mcp_parser_t *pr, const char *command, size_t cmdlen) {
456     // reserving an upvalue for key.
457     mcp_request_t *rq = lua_newuserdatauv(L, sizeof(mcp_request_t) + MCP_REQUEST_MAXLEN + KEY_MAX_LENGTH, 1);
458     // TODO (v2): memset only the non-data part? as the rest gets memcpy'd
459     // over.
460     memset(rq, 0, sizeof(mcp_request_t));
461     memcpy(&rq->pr, pr, sizeof(*pr));
462 
463     memcpy(rq->request, command, cmdlen);
464     rq->pr.request = rq->request;
465     rq->pr.reqlen = cmdlen;
466 
467     luaL_getmetatable(L, "mcp.request");
468     lua_setmetatable(L, -2);
469 
470     // at this point we should know if we have to bounce through _nread to
471     // get item data or not.
472     return rq;
473 }
474 
475 // fill a preallocated request object.
mcp_set_request(mcp_parser_t * pr,mcp_request_t * rq,const char * command,size_t cmdlen)476 void mcp_set_request(mcp_parser_t *pr, mcp_request_t *rq, const char *command, size_t cmdlen) {
477     memset(rq, 0, sizeof(mcp_request_t));
478     memcpy(&rq->pr, pr, sizeof(*pr));
479 
480     memcpy(rq->request, command, cmdlen);
481     rq->pr.request = rq->request;
482     rq->pr.reqlen = cmdlen;
483 }
484 
485 // Replaces a token inside a request and re-parses.
486 // Note that this has some optimization opportunities. Delaying until
487 // required.
488 // We should not guarantee order when updating meta flags, which would allow
489 // blanking tokens and appending new ones.
490 // TODO (v2): much of the length is the key, avoid copying it.
mcp_request_render(mcp_request_t * rq,int idx,const char flag,const char * tok,size_t len)491 int mcp_request_render(mcp_request_t *rq, int idx, const char flag, const char *tok, size_t len) {
492     char temp[MCP_REQUEST_MAXLEN+1];
493     char *p = temp;
494     mcp_parser_t *pr = &rq->pr;
495 
496     if (pr->reqlen + len + 2 > MCP_REQUEST_MAXLEN) {
497         return -1;
498     }
499     // Cannot add/append tokens yet.
500     if (idx >= pr->ntokens) {
501         return -1;
502     }
503 
504     memcpy(p, pr->request, pr->tokens[idx]);
505     p += pr->tokens[idx];
506 
507     if (flag) {
508         *p = flag;
509         p++;
510     }
511     if (tok) {
512         memcpy(p, tok, len);
513         p += len;
514     }
515 
516     // Add a space and copy more tokens if there were more.
517     if (idx+1 < pr->ntokens) {
518         if (flag || len != 0) {
519             // Only pre-space if not deleting the token.
520             *p = ' ';
521             p++;
522         }
523         memcpy(p, &pr->request[pr->tokens[idx+1]], pr->tokens[pr->ntokens] - pr->tokens[idx+1]);
524         p += pr->tokens[pr->ntokens] - pr->tokens[idx+1];
525     } else {
526         // If we removed something from the end we might've left some spaces.
527         while (*(p-1) == ' ') {
528             p--;
529         }
530     }
531 
532     memcpy(p, "\r\n\0", 3);
533     p += 2;
534 
535     memcpy(rq->request, temp, p - temp);
536 
537     // Hold the vlen/vbuf and restore after re-parsing. Since we can only edit
538     // the command line, not the value here, we would otherwise allow sending
539     // arbitrary memory over the network if someone modifies a SET.
540     void *vbuf = pr->vbuf;
541     int vlen = pr->vlen;
542 
543     memset(pr, 0, sizeof(mcp_parser_t)); // TODO: required?
544     int ret = process_request(pr, rq->request, p - temp);
545     if (ret != 0) {
546         return ret;
547     }
548     pr->vbuf = vbuf;
549     pr->vlen = vlen;
550     return 0;
551 }
552 
mcp_request_append(mcp_request_t * rq,const char flag,const char * tok,size_t len)553 int mcp_request_append(mcp_request_t *rq, const char flag, const char *tok, size_t len) {
554     mcp_parser_t *pr = &rq->pr;
555     const char *start = pr->request;
556     char *p = (char *)pr->request + pr->reqlen - 2; // start at the \r
557     assert(*p == '\r');
558 
559     if (pr->reqlen + len + 2 > MCP_REQUEST_MAXLEN) {
560         return -1;
561     }
562 
563     *p = ' ';
564     p++;
565 
566     if (flag) {
567         *p = flag;
568         p++;
569     }
570     if (tok) {
571         memcpy(p, tok, len);
572         p += len;
573     }
574 
575     memcpy(p, "\r\n\0", 3);
576     p += 2;
577 
578     // See note on mcp_request_render()
579     void *vbuf = pr->vbuf;
580     int vlen = pr->vlen;
581 
582     memset(pr, 0, sizeof(mcp_parser_t)); // TODO: required?
583     int ret = process_request(pr, rq->request, p - start);
584     if (ret != 0) {
585         return ret;
586     }
587     pr->vbuf = vbuf;
588     pr->vlen = vlen;
589 
590     return 0;
591 }
592 
mcp_request_attach(mcp_request_t * rq,io_pending_proxy_t * p)593 void mcp_request_attach(mcp_request_t *rq, io_pending_proxy_t *p) {
594     mcp_parser_t *pr = &rq->pr;
595     char *r = (char *) pr->request;
596     size_t len = pr->reqlen;
597 
598     // The stringified request. This is also referencing into the coroutine
599     // stack, which should be safe from gc.
600     p->iov[0].iov_base = r;
601     p->iov[0].iov_len = len;
602     p->iovcnt = 1;
603     p->iovbytes = len;
604     if (pr->vlen != 0) {
605         p->iov[1].iov_base = pr->vbuf;
606         p->iov[1].iov_len = pr->vlen;
607         p->iovcnt = 2;
608         p->iovbytes += pr->vlen;
609     }
610 }
611 
612 // second argument is optional, for building set requests.
613 // TODO: append the \r\n for the VAL?
mcplib_request(lua_State * L)614 int mcplib_request(lua_State *L) {
615     LIBEVENT_THREAD *t = PROXY_GET_THR(L);
616     size_t len = 0;
617     size_t vlen = 0;
618     mcp_parser_t pr = {0};
619     const char *cmd = luaL_checklstring(L, 1, &len);
620     const char *val = NULL;
621     int type = lua_type(L, 2);
622     if (type == LUA_TSTRING) {
623         val = luaL_optlstring(L, 2, NULL, &vlen);
624         if (vlen < 2 || memcmp(val+vlen-2, "\r\n", 2) != 0) {
625             proxy_lua_error(L, "value passed to mcp.request must end with \\r\\n");
626         }
627     } else if (type == LUA_TUSERDATA) {
628         // vlen for requests and responses include the "\r\n" already.
629         mcp_resp_t *r = luaL_testudata(L, 2, "mcp.response");
630         if (r != NULL) {
631             if (r->resp.value) {
632                 val = r->resp.value;
633                 vlen = r->resp.vlen_read; // paranoia, so we can't overread into memory.
634             }
635         } else {
636             mcp_request_t *rq = luaL_testudata(L, 2, "mcp.request");
637             if (rq->pr.vbuf) {
638                 val = rq->pr.vbuf;
639                 vlen = rq->pr.vlen;
640             }
641         }
642     }
643 
644     // FIXME (v2): if we inline the userdata we can avoid memcpy'ing the parser
645     // structure from the stack? but causes some code duplication.
646     if (process_request(&pr, cmd, len) != 0) {
647         proxy_lua_error(L, "failed to parse request");
648         return 0;
649     }
650     mcp_request_t *rq = mcp_new_request(L, &pr, cmd, len);
651 
652     if (val != NULL) {
653         rq->pr.vlen = vlen;
654         rq->pr.vbuf = malloc(vlen);
655         if (rq->pr.vbuf == NULL) {
656             // Note: without *c we can't tick the appropriate counter.
657             // However, in practice raw malloc's are nearly never going to
658             // fail.
659             // TODO(v2): we can stack values into the request objects or use
660             // the slabber memory, so this isn't necessary anyway.
661             proxy_lua_error(L, "failed to allocate value memory for request object");
662         }
663         memcpy(rq->pr.vbuf, val, vlen);
664         // Note: Not enforcing the memory limit here is deliberate:
665         // - if we're over the memory limit, it'll get caught very soon after
666         // this, but we won't be causing some lua to bail mid-flight, which is
667         // more graceful to the end user.
668         pthread_mutex_lock(&t->proxy_limit_lock);
669         t->proxy_buffer_memory_used += rq->pr.vlen;
670         pthread_mutex_unlock(&t->proxy_limit_lock);
671     }
672 
673     // rq is now created, parsed, and on the stack.
674     return 1;
675 }
676 
mcplib_request_key(lua_State * L)677 int mcplib_request_key(lua_State *L) {
678     mcp_request_t *rq = luaL_checkudata(L, -1, "mcp.request");
679     lua_pushlstring(L, MCP_PARSER_KEY(rq->pr), rq->pr.klen);
680     return 1;
681 }
682 
683 // NOTE: I've mixed up const/non-const strings in the request. During parsing
684 // we want it to be const, but after that's done the request is no longer
685 // const. It might be better to just remove the const higher up the chain, but
686 // I'd rather not. So for now these functions will be dumping the const to
687 // modify the string.
mcplib_request_ltrimkey(lua_State * L)688 int mcplib_request_ltrimkey(lua_State *L) {
689     mcp_request_t *rq = luaL_checkudata(L, -2, "mcp.request");
690     int totrim = luaL_checkinteger(L, -1);
691     char *key = (char *) MCP_PARSER_KEY(rq->pr);
692 
693     if (totrim > rq->pr.klen) {
694         proxy_lua_error(L, "ltrimkey cannot zero out key");
695         return 0;
696     } else {
697         memset(key, ' ', totrim);
698         rq->pr.klen -= totrim;
699         rq->pr.tokens[rq->pr.keytoken] += totrim;
700     }
701     return 1;
702 }
703 
mcplib_request_rtrimkey(lua_State * L)704 int mcplib_request_rtrimkey(lua_State *L) {
705     mcp_request_t *rq = luaL_checkudata(L, -2, "mcp.request");
706     int totrim = luaL_checkinteger(L, -1);
707     char *key = (char *) MCP_PARSER_KEY(rq->pr);
708 
709     if (totrim > rq->pr.klen) {
710         proxy_lua_error(L, "rtrimkey cannot zero out key");
711         return 0;
712     } else {
713         memset(key + (rq->pr.klen - totrim), ' ', totrim);
714         rq->pr.klen -= totrim;
715         // don't need to change the key token.
716     }
717     return 1;
718 }
719 
720 // Virtual table operations on the request.
mcplib_request_token(lua_State * L)721 int mcplib_request_token(lua_State *L) {
722     mcp_request_t *rq = luaL_checkudata(L, 1, "mcp.request");
723     int argc = lua_gettop(L);
724 
725     if (argc == 1) {
726         lua_pushnil(L);
727         return 1;
728     }
729 
730     int token = luaL_checkinteger(L, 2);
731 
732     if (token < 1 || token > rq->pr.ntokens) {
733         // maybe an error?
734         lua_pushnil(L);
735         return 1;
736     }
737 
738     size_t vlen = 0;
739     if (argc > 2) {
740         // overwriting a token.
741         size_t newlen = 0;
742         const char *newtok = lua_tolstring(L, 3, &newlen);
743         if (mcp_request_render(rq, token-1, 0, newtok, newlen) != 0) {
744             proxy_lua_error(L, "token(): request malformed after edit");
745             return 0;
746         }
747         return 0;
748     } else {
749         // fetching a token.
750         const char *start = rq->pr.request + rq->pr.tokens[token-1];
751         vlen = _process_token_len(&rq->pr, token-1);
752 
753         P_DEBUG("%s: pushing token of len: %lu\n", __func__, vlen);
754         lua_pushlstring(L, start, vlen);
755         return 1;
756     }
757 
758     return 0;
759 }
760 
761 // Fetch only.
mcplib_request_token_int(lua_State * L)762 int mcplib_request_token_int(lua_State *L) {
763     mcp_request_t *rq = luaL_checkudata(L, 1, "mcp.request");
764     int argc = lua_gettop(L);
765 
766     if (argc == 1) {
767         lua_pushnil(L);
768         return 1;
769     }
770 
771     int x = luaL_checkinteger(L, 2);
772 
773     if (x < 1 || x > rq->pr.ntokens) {
774         // maybe an error?
775         lua_pushnil(L);
776         return 1;
777     }
778 
779     size_t vlen = 0;
780     // fetching a token.
781     const char *s = rq->pr.request + rq->pr.tokens[x-1];
782     vlen = _process_token_len(&rq->pr, x-1);
783     // do a funny dance to safely strtol the token.
784     // TODO: use tokenizer based tokto when merged.
785     char temp[22];
786     int tocopy = vlen > 22 ? 21 : vlen;
787     memcpy(temp, s, tocopy);
788     temp[vlen] = '\0';
789     int64_t token = 0;
790     if (safe_strtoll(temp, &token)) {
791         lua_pushinteger(L, token);
792     } else {
793         lua_pushnil(L);
794     }
795 
796     return 1;
797 }
798 
mcplib_request_ntokens(lua_State * L)799 int mcplib_request_ntokens(lua_State *L) {
800     mcp_request_t *rq = luaL_checkudata(L, 1, "mcp.request");
801     lua_pushinteger(L, rq->pr.ntokens);
802     return 1;
803 }
804 
mcplib_request_command(lua_State * L)805 int mcplib_request_command(lua_State *L) {
806     mcp_request_t *rq = luaL_checkudata(L, -1, "mcp.request");
807     lua_pushinteger(L, rq->pr.command);
808     return 1;
809 }
810 
mcplib_request_has_flag(lua_State * L)811 int mcplib_request_has_flag(lua_State *L) {
812     mcp_request_t *rq = luaL_checkudata(L, 1, "mcp.request");
813     size_t len = 0;
814     const char *flagstr = luaL_checklstring(L, 2, &len);
815     if (len != 1) {
816         proxy_lua_error(L, "has_flag(): meta flag must be a single character");
817         return 0;
818     }
819     if (flagstr[0] < 65 || flagstr[0] > 122) {
820         proxy_lua_error(L, "has_flag(): invalid flag, must be A-Z,a-z");
821         return 0;
822     }
823     uint64_t flagbit = (uint64_t)1 << (flagstr[0] - 65);
824     if (rq->pr.t.meta.flags & flagbit) {
825         lua_pushboolean(L, 1);
826     } else {
827         lua_pushboolean(L, 0);
828     }
829 
830     return 1;
831 }
832 
833 // req:flag_token("F") -> (bool, nil|token)
834 // req:flag_token("O", "Onewopauqe") -> (bool, oldtoken)
mcplib_request_flag_token(lua_State * L)835 int mcplib_request_flag_token(lua_State *L) {
836     mcp_request_t *rq = luaL_checkudata(L, 1, "mcp.request");
837     size_t len = 0;
838     const char *flagstr = luaL_checklstring(L, 2, &len);
839     bool replace = false;
840     if (len != 1) {
841         proxy_lua_error(L, "has_flag(): meta flag must be a single character");
842         return 0;
843     }
844     if (flagstr[0] < 65 || flagstr[0] > 122) {
845         proxy_lua_error(L, "has_flag(): invalid flag, must be A-Z,a-z");
846         return 0;
847     }
848     if (lua_isstring(L, 3)) {
849         // overwriting a flag/token with the third argument.
850         replace = true;
851     }
852     uint64_t flagbit = (uint64_t)1 << (flagstr[0] - 65);
853 
854     int ret = 1;
855     if (rq->pr.t.meta.flags & flagbit) {
856         // The flag definitely exists, but sadly we need to scan for the
857         // actual flag to see if it has a token.
858         lua_pushboolean(L, 1);
859         for (int x = rq->pr.keytoken+1; x < rq->pr.ntokens; x++) {
860             const char *s = rq->pr.request + rq->pr.tokens[x];
861             if (s[0] == flagstr[0]) {
862                 size_t vlen = _process_token_len(&rq->pr, x);
863                 if (vlen > 1) {
864                     // strip the flag off the token and return.
865                     lua_pushlstring(L, s+1, vlen-1);
866                     ret = 2;
867                 }
868 
869                 // Have something to replace the flag/token with.
870                 if (replace) {
871                     size_t newlen = 0;
872                     const char *newtok = lua_tolstring(L, 3, &newlen);
873                     if (mcp_request_render(rq, x, 0, newtok, newlen) != 0) {
874                         proxy_lua_error(L, "token(): request malformed after edit");
875                         return 0;
876                     }
877                 }
878                 break;
879             }
880         }
881     } else {
882         lua_pushboolean(L, 0);
883     }
884 
885     return ret;
886 }
887 
888 // returns bool, int
889 // bool results if flag exists or not
890 // if int conversion fails, int is nil
mcplib_request_flag_token_int(lua_State * L)891 int mcplib_request_flag_token_int(lua_State *L) {
892     mcp_request_t *rq = luaL_checkudata(L, 1, "mcp.request");
893     size_t len = 0;
894     const char *flagstr = luaL_checklstring(L, 2, &len);
895     if (len != 1) {
896         proxy_lua_error(L, "has_flag(): meta flag must be a single character");
897         return 0;
898     }
899     if (flagstr[0] < 65 || flagstr[0] > 122) {
900         proxy_lua_error(L, "has_flag(): invalid flag, must be A-Z,a-z");
901         return 0;
902     }
903 
904     uint64_t flagbit = (uint64_t)1 << (flagstr[0] - 65);
905 
906     int ret = 1;
907     if (rq->pr.t.meta.flags & flagbit) {
908         lua_pushboolean(L, 1);
909         for (int x = rq->pr.keytoken+1; x < rq->pr.ntokens; x++) {
910             const char *s = rq->pr.request + rq->pr.tokens[x];
911             if (s[0] == flagstr[0]) {
912                 size_t vlen = _process_token_len(&rq->pr, x);
913                 if (vlen > 1) {
914                     // do a funny dance to safely strtol the token.
915                     // TODO: use tokenizer based tokto when merged.
916                     char temp[22];
917                     int tocopy = vlen > 22 ? 21 : vlen-1;
918                     memcpy(temp, s+1, tocopy);
919                     temp[vlen-1] = '\0';
920                     int64_t token = 0;
921                     if (safe_strtoll(temp, &token)) {
922                         lua_pushinteger(L, token);
923                     } else {
924                         lua_pushnil(L);
925                     }
926                     ret = 2;
927                 }
928 
929                 break;
930             }
931         }
932     } else {
933         lua_pushboolean(L, 0);
934     }
935 
936     return ret;
937 }
938 
939 // these functions take token as string or number
940 // if number, internally convert it to avoid creating garbage
_mcp_request_get_arg_flag(lua_State * L,int idx)941 static inline char _mcp_request_get_arg_flag(lua_State *L, int idx) {
942     size_t len = 0;
943     const char *flagstr = luaL_checklstring(L, idx, &len);
944 
945     if (len != 1) {
946         proxy_lua_error(L, "request: meta flag must be a single character");
947         return 0;
948     }
949     if (flagstr[0] < 65 || flagstr[0] > 122) {
950         proxy_lua_error(L, "request: invalid flag, must be A-Z,a-z");
951         return 0;
952     }
953 
954     return flagstr[0];
955 }
956 
957 // *tostring must be large enough to hold a 64bit number as a string.
_mcp_request_check_flag_token(lua_State * L,int idx,char * tostring,size_t * tlen)958 static inline const char * _mcp_request_check_flag_token(lua_State *L, int idx, char *tostring, size_t *tlen) {
959     const char *token = NULL;
960     *tlen = 0;
961     if (lua_isstring(L, idx)) {
962         token = lua_tolstring(L, idx, tlen);
963     } else if (lua_isnumber(L, idx)) {
964         int isnum = 0;
965         lua_Integer n = lua_tointegerx(L, idx, &isnum);
966         if (isnum) {
967             char *end = itoa_64(n, tostring);
968             token = tostring;
969             *tlen = end - tostring;
970         } else {
971             proxy_lua_error(L, "request: invalid flag argument");
972             return NULL;
973         }
974     } else if (lua_isnoneornil(L, idx)) {
975         // no token, just add the flag.
976     } else {
977         proxy_lua_error(L, "request: invalid flag argument");
978         return NULL;
979     }
980 
981     return token;
982 }
983 
984 // req:flag_add("F", token) -> (bool)
985 // if token is "example", appends "Fexample" to request
mcplib_request_flag_add(lua_State * L)986 int mcplib_request_flag_add(lua_State *L) {
987     mcp_request_t *rq = luaL_checkudata(L, 1, "mcp.request");
988     char flag = _mcp_request_get_arg_flag(L, 2);
989     char tostring[30];
990 
991     uint64_t flagbit = (uint64_t)1 << (flag - 65);
992     if (rq->pr.t.meta.flags & flagbit) {
993         // fail, flag already exists.
994         lua_pushboolean(L, 0);
995         return 1;
996     }
997 
998     size_t tlen = 0;
999     const char *token = _mcp_request_check_flag_token(L, 3, tostring, &tlen);
1000 
1001     if (mcp_request_append(rq, flag, token, tlen) == 0) {
1002         lua_pushboolean(L, 1);
1003     } else {
1004         lua_pushboolean(L, 0);
1005     }
1006 
1007     return 1;
1008 }
1009 
1010 // req:flag_set("F", token) -> (bool) [overwrites if exists]
1011 // if token is "example", appends "Fexample" to request
mcplib_request_flag_set(lua_State * L)1012 int mcplib_request_flag_set(lua_State *L) {
1013     mcp_request_t *rq = luaL_checkudata(L, 1, "mcp.request");
1014     char flag = _mcp_request_get_arg_flag(L, 2);
1015     char tostring[30];
1016 
1017     int x = mcp_request_find_flag_index(rq, flag);
1018     size_t tlen = 0;
1019     const char *token = _mcp_request_check_flag_token(L, 3, tostring, &tlen);
1020 
1021     if (x > 0) {
1022         // TODO: do nothing if:
1023         // flag exists in request, without token, and we're not setting a
1024         // token.
1025         if (mcp_request_render(rq, x, flag, token, tlen) != 0) {
1026             lua_pushboolean(L, 0);
1027             return 1;
1028         }
1029     } else {
1030         if (mcp_request_append(rq, flag, token, tlen) != 0) {
1031             lua_pushboolean(L, 0);
1032             return 1;
1033         }
1034     }
1035 
1036     lua_pushboolean(L, 1);
1037     return 1;
1038 }
1039 
1040 // allows replacing a flag with a different flag
1041 // req:flag_replace("F", "N", token) -> (bool)
1042 // if token is "example", appends "Nexample" to request
mcplib_request_flag_replace(lua_State * L)1043 int mcplib_request_flag_replace(lua_State *L) {
1044     mcp_request_t *rq = luaL_checkudata(L, 1, "mcp.request");
1045     char flag = _mcp_request_get_arg_flag(L, 2);
1046     char newflag = _mcp_request_get_arg_flag(L, 3);
1047     char tostring[30];
1048 
1049     int x = mcp_request_find_flag_index(rq, flag);
1050     size_t tlen = 0;
1051     const char *token = _mcp_request_check_flag_token(L, 4, tostring, &tlen);
1052 
1053     if (x > 0) {
1054         if (mcp_request_render(rq, x, newflag, token, tlen) != 0) {
1055             lua_pushboolean(L, 0);
1056             return 1;
1057         }
1058     } else {
1059         if (mcp_request_append(rq, newflag, token, tlen) != 0) {
1060             lua_pushboolean(L, 0);
1061             return 1;
1062         }
1063     }
1064 
1065     lua_pushboolean(L, 1);
1066     return 1;
1067 }
1068 
1069 // req:flag_del("F") -> (bool)
1070 // remove a flag if exists
mcplib_request_flag_del(lua_State * L)1071 int mcplib_request_flag_del(lua_State *L) {
1072     mcp_request_t *rq = luaL_checkudata(L, 1, "mcp.request");
1073     char flag = _mcp_request_get_arg_flag(L, 2);
1074 
1075     int x = mcp_request_find_flag_index(rq, flag);
1076 
1077     if (x > 0) {
1078         if (mcp_request_render(rq, x, 0, NULL, 0) != 0) {
1079             lua_pushboolean(L, 0);
1080             return 1;
1081         }
1082     } else {
1083         // nothing there, didn't delete anything.
1084         lua_pushboolean(L, 0);
1085         return 1;
1086     }
1087 
1088     lua_pushboolean(L, 1);
1089     return 1;
1090 }
1091 
1092 // local match, token = req:match_res(res)
1093 // checks if req has `k` or `O`. If so, checks response for `K` or `O`
1094 // returns true, nil if matches
1095 // returns false, res token if not match.
1096 //
mcplib_request_match_res(lua_State * L)1097 int mcplib_request_match_res(lua_State *L) {
1098     mcp_request_t *rq = luaL_checkudata(L, 1, "mcp.request");
1099     mcp_resp_t *rs = luaL_checkudata(L, 2, "mcp.response");
1100 
1101     const char *opaque_token = NULL;
1102     size_t opaque_len = 0;
1103     mcmc_resp_t reresp;
1104 
1105     // requests all have keys. check for an opaque.
1106     mcp_request_find_flag_token(rq, 'O', &opaque_token, &opaque_len);
1107     mcmc_parse_buf(rs->buf, rs->blen, &reresp);
1108 
1109     // scan the response line for tokens, since we don't have a reciprocal API
1110     // yet. When we do this code will be replaced with a function call like
1111     // the above.
1112     const char *p = reresp.rline;
1113     const char *e = p + reresp.rlen;
1114     if (!p) {
1115         // happens if the result line is blank (ie; 'HD\r\n')
1116         lua_pushboolean(L, 0);
1117         lua_pushnil(L);
1118         return 2;
1119     }
1120 
1121     int matched = 0;
1122     while (p != e) {
1123         if (*p == ' ') {
1124             p++;
1125         } else if (*p == 'k' || *p == 'O') {
1126             const char *rq_token = NULL;
1127             int rq_len = 0;
1128             if (*p == 'k') {
1129                 rq_token = MCP_PARSER_KEY(rq->pr);
1130                 rq_len = rq->pr.klen;
1131             } else if (*p == 'O') {
1132                 rq_token = opaque_token;
1133                 rq_len = opaque_len;
1134             }
1135             if (rq_token == NULL) {
1136                 lua_pushboolean(L, 0);
1137                 lua_pushnil(L);
1138                 return 2;
1139             }
1140 
1141             p++; // skip flag and start comparing token
1142             const char *rs_token = p;
1143 
1144             // find end of token
1145             while (p != e && !isspace(*p)) {
1146                 p++;
1147             }
1148 
1149             int rs_len = p - rs_token;
1150             if (rq_len != rs_len || memcmp(rq_token, rs_token, rs_len) != 0) {
1151                 // FAIL, keys aren't the same length or don't match.
1152                 lua_pushboolean(L, 0);
1153                 lua_pushlstring(L, rs_token, rs_len);
1154                 return 2;
1155             } else {
1156                 matched = 1;
1157             }
1158         } else {
1159             // skip token
1160             while (p != e && *p != ' ') {
1161                 p++;
1162             }
1163         }
1164     }
1165 
1166     lua_pushboolean(L, matched);
1167     lua_pushnil(L);
1168     return 2;
1169 }
1170 
mcp_request_cleanup(LIBEVENT_THREAD * t,mcp_request_t * rq)1171 void mcp_request_cleanup(LIBEVENT_THREAD *t, mcp_request_t *rq) {
1172     // During nread c->item is the malloc'ed buffer. not yet put into
1173     // rq->buf - this gets freed because we've also set c->item_malloced if
1174     // the connection closes before finishing nread.
1175     if (rq->pr.vbuf != NULL) {
1176         pthread_mutex_lock(&t->proxy_limit_lock);
1177         t->proxy_buffer_memory_used -= rq->pr.vlen;
1178         pthread_mutex_unlock(&t->proxy_limit_lock);
1179         free(rq->pr.vbuf);
1180         // need to ensure we NULL this out now, since we can call the cleanup
1181         // routine independent of GC, and a later GC would double-free.
1182         rq->pr.vbuf = NULL;
1183     }
1184 }
1185 
mcplib_request_gc(lua_State * L)1186 int mcplib_request_gc(lua_State *L) {
1187     LIBEVENT_THREAD *t = PROXY_GET_THR(L);
1188     mcp_request_t *rq = luaL_checkudata(L, -1, "mcp.request");
1189     mcp_request_cleanup(t, rq);
1190 
1191     return 0;
1192 }
1193 
_mcp_request_find_flag(mcp_request_t * rq,const char flag)1194 static int _mcp_request_find_flag(mcp_request_t *rq, const char flag) {
1195     uint64_t flagbit = (uint64_t)1 << (flag - 65);
1196     if (rq->pr.t.meta.flags & flagbit) {
1197         for (int x = rq->pr.keytoken+1; x < rq->pr.ntokens; x++) {
1198             const char *s = rq->pr.request + rq->pr.tokens[x];
1199             if (s[0] == flag) {
1200                 return x;
1201             }
1202         }
1203     }
1204     return -1;
1205 }
1206 
mcp_request_find_flag_index(mcp_request_t * rq,const char flag)1207 int mcp_request_find_flag_index(mcp_request_t *rq, const char flag) {
1208     int x = _mcp_request_find_flag(rq, flag);
1209     return x;
1210 }
1211 
mcp_request_find_flag_token(mcp_request_t * rq,const char flag,const char ** token,size_t * len)1212 int mcp_request_find_flag_token(mcp_request_t *rq, const char flag, const char **token, size_t *len) {
1213     int x = _mcp_request_find_flag(rq, flag);
1214     if (x > 0) {
1215         size_t tlen = _process_token_len(&rq->pr, x);
1216         if (tlen > 1) {
1217             *token = rq->pr.request + rq->pr.tokens[x] +1;
1218         } else {
1219             *token = NULL;
1220         }
1221         *len = tlen-1;
1222     }
1223     return x;
1224 }
1225 
1226 // FIXME: temporary copypasta accessor until request objects can be moved to
1227 // mcmc tokenizer.
mcp_request_find_flag_tokenint64(mcp_request_t * rq,const char flag,int64_t * token)1228 int mcp_request_find_flag_tokenint64(mcp_request_t *rq, const char flag, int64_t *token) {
1229     for (int x = rq->pr.keytoken+1; x < rq->pr.ntokens; x++) {
1230         const char *s = rq->pr.request + rq->pr.tokens[x];
1231         if (s[0] == flag) {
1232             size_t vlen = _process_token_len(&rq->pr, x);
1233             if (vlen > 1) {
1234                 // do a funny dance to safely strtol the token.
1235                 char temp[22];
1236                 int tocopy = vlen > 22 ? 21 : vlen-1;
1237                 memcpy(temp, s+1, tocopy);
1238                 temp[vlen-1] = '\0';
1239                 if (safe_strtoll(temp, token)) {
1240                     return 0;
1241                 } else {
1242                     return -1;
1243                 }
1244             }
1245 
1246             break;
1247         }
1248     }
1249 
1250     return -1;
1251 }
1252 
1253 // TODO (v2): check what lua does when it calls a function with a string argument
1254 // stored from a table/similar (ie; the prefix check code).
1255 // If it's not copying anything, we can add request-side functions to do most
1256 // forms of matching and avoid copying the key to lua space.
1257