1

网上找的base64解码算法,中间停止解码,不知道是不是字符串太大,有592个字符,从base64解码到ASCII的字符串大小有限制.

预期结果:

Lorem ipsum dolor sit amet,consectetur adipiscing elit,sed do eiusmod tempor incididunt ut labore et dolore magna aliqua。Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat。Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur。Exceptioneur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est labourum

实际结果:

Lorem ipsum dolor sit amet,consectetur adipiscing elit,sed do eiusmod tempor incididunt ut labore et dolore magna aliqua。Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat。杜伊斯·奥特·伊鲁

unsigned char *base64_decode(const char *data, size_t input_length, size_t *output_length) {

  unsigned char *decoded_data;
  int i, j = 0;

    if (decoding_table == NULL) build_decoding_table();

    if (input_length % 4 != 0) return NULL;

    *output_length = input_length / 4 * 3;

    if (data[input_length - 1] == '=') (*output_length)--;
    if (data[input_length - 2] == '=') (*output_length)--;

    decoded_data = (unsigned char*)malloc(*output_length +1);

    if (decoded_data == NULL) return NULL;

    for (i = 0, j = 0; i < input_length;) {

        uint32_t sextet_a = data[i] == '=' ? 0 & i++ : decoding_table[data[i++]];
        uint32_t sextet_b = data[i] == '=' ? 0 & i++ : decoding_table[data[i++]];
        uint32_t sextet_c = data[i] == '=' ? 0 & i++ : decoding_table[data[i++]];
        uint32_t sextet_d = data[i] == '=' ? 0 & i++ : decoding_table[data[i++]];

        uint32_t triple = (sextet_a << 3 * 6)
        + (sextet_b << 2 * 6)
        + (sextet_c << 1 * 6)
        + (sextet_d << 0 * 6);

        if (j < *output_length) decoded_data[j++] = (triple >> 2 * 8) & 0xFF;
        if (j < *output_length) decoded_data[j++] = (triple >> 1 * 8) & 0xFF;
        if (j < *output_length) decoded_data[j++] = (triple >> 0 * 8) & 0xFF;
    }

    return decoded_data;
}

int main(int argc, _TCHAR* argv[]){

    const char* data64 = "TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwgc2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWduYSBhbGlxdWEuIFV0IGVuaW0gYWQgbWluaW0gdmVuaWFtLCBxdWlzIG5vc3RydWQgZXhlcmNpdGF0aW9uIHVsbGFtY28gbGFib3JpcyBuaXNpIHV0IGFsaXF1aXAgZXggZWEgY29tbW9kbyBjb25zZXF1YXQuIER1aXMgYXV0ZSBpcnUAZSBkb2xvciBpbiByZXByZWhlbmRlcml0IGluIHZvbHVwdGF0ZSB2ZWxpdCBlc3NlIGNpbGx1bSBkb2xvcmUgZXUgZnVnaWF0IG51bGxhIHBhcmlhdHVyLiBFeGNlcHRldXIgc2ludCBvY2NhZWNhdCBjdXBpZGF0YXQgbm9uIHByb2lkZW50LCBzdW50IGluIGN1bHBhIHF1aSBvZmZpY2lhIGRlc2VydW50IG1vbGxpdCBhbmltIGlkIGVzdCBsYWJvcnVt";
    size_t input_length = strlen(data64);
    size_t output_length;

    char * output = (char *) base64_decode(data64, input_length, &output_length);
    char * result = (char*)calloc(output_length, sizeof(char));
    strncpy(result, output, output_length);
    result[output_length] = 0;

    printf(result);
    printf("\n");
    base64_cleanup();
    system("pause");
    return 0;

}
4

1 回答 1

0

问题是,您的编码字符串包含一个零字节!看:

echo TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwgc2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWduYSBhbGlxdWEuIFV0IGVuaW0gYWQgbWluaW0gdmVuaWFtLCBxdWlzIG5vc3RydWQgZXhlcmNpdGF0aW9uIHVsbGFtY28gbGFib3JpcyBuaXNpIHV0IGFsaXF1aXAgZXggZWEgY29tbW9kbyBjb25zZXF1YXQuIER1aXMgYXV0ZSBpcnUAZSBkb2xvciBpbiByZXByZWhlbmRlcml0IGluIHZvbHVwdGF0ZSB2ZWxpdCBlc3NlIGNpbGx1bSBkb2xvcmUgZXUgZnVnaWF0IG51bGxhIHBhcmlhdHVyLiBFeGNlcHRldXIgc2ludCBvY2NhZWNhdCBjdXBpZGF0YXQgbm9uIHByb2lkZW50LCBzdW50IGluIGN1bHBhIHF1aSBvZmZpY2lhIGRlc2VydW50IG1vbGxpdCBhbmltIGlkIGVzdCBsYWJvcnVt
     | base64 -D | xxd 
00000000: 4c6f 7265 6d20 6970 7375 6d20 646f 6c6f  Lorem ipsum dolo
00000010: 7220 7369 7420 616d 6574 2c20 636f 6e73  r sit amet, cons
00000020: 6563 7465 7475 7220 6164 6970 6973 6369  ectetur adipisci
00000030: 6e67 2065 6c69 742c 2073 6564 2064 6f20  ng elit, sed do 
00000040: 6569 7573 6d6f 6420 7465 6d70 6f72 2069  eiusmod tempor i
00000050: 6e63 6964 6964 756e 7420 7574 206c 6162  ncididunt ut lab
00000060: 6f72 6520 6574 2064 6f6c 6f72 6520 6d61  ore et dolore ma
00000070: 676e 6120 616c 6971 7561 2e20 5574 2065  gna aliqua. Ut e
00000080: 6e69 6d20 6164 206d 696e 696d 2076 656e  nim ad minim ven
00000090: 6961 6d2c 2071 7569 7320 6e6f 7374 7275  iam, quis nostru
000000a0: 6420 6578 6572 6369 7461 7469 6f6e 2075  d exercitation u
000000b0: 6c6c 616d 636f 206c 6162 6f72 6973 206e  llamco laboris n
000000c0: 6973 6920 7574 2061 6c69 7175 6970 2065  isi ut aliquip e
000000d0: 7820 6561 2063 6f6d 6d6f 646f 2063 6f6e  x ea commodo con
000000e0: 7365 7175 6174 2e20 4475 6973 2061 7574  sequat. Duis aut
000000f0: 6520 6972 7500 6520 646f 6c6f 7220 696e  e iru.e dolor in
00000100: 2072 6570 7265 6865 6e64 6572 6974 2069   reprehenderit i
00000110: 6e20 766f 6c75 7074 6174 6520 7665 6c69  n voluptate veli
00000120: 7420 6573 7365 2063 696c 6c75 6d20 646f  t esse cillum do
00000130: 6c6f 7265 2065 7520 6675 6769 6174 206e  lore eu fugiat n
00000140: 756c 6c61 2070 6172 6961 7475 722e 2045  ulla pariatur. E
00000150: 7863 6570 7465 7572 2073 696e 7420 6f63  xcepteur sint oc
00000160: 6361 6563 6174 2063 7570 6964 6174 6174  caecat cupidatat
00000170: 206e 6f6e 2070 726f 6964 656e 742c 2073   non proident, s
00000180: 756e 7420 696e 2063 756c 7061 2071 7569  unt in culpa qui
00000190: 206f 6666 6963 6961 2064 6573 6572 756e   officia deserun
000001a0: 7420 6d6f 6c6c 6974 2061 6e69 6d20 6964  t mollit anim id
000001b0: 2065 7374 206c 6162 6f72 756d             est laborum

在这一行:

000000f0: 6520 6972 7500 6520 646f 6c6f 7220 696e  e iru.e dolor in

第六个字节是零字节,输出printf()停止在那里。

如果你执行

fwrite(result, output_length, 1, stdout);

您将看到整个文本输出。

顺便提一句:

printf(result);

在任何情况下都应该避免,因为如果字符串result是从不受信任的来源生成的,那么在最好的情况下,嵌入式格式说明符可能会破坏输出,或者在最坏的情况下导致恶意代码执行。更好的使用:

printf("%s", result);

或者

fputs(stdout, result); // as supercat suggested ;)

但当然不是在这种情况下,因为它们也会在零字节处停止。

于 2019-02-18T18:24:10.307 回答