0

我有一个特定的用例,我找不到一个例子,到目前为止,尝试将不同的例子组合在一起对我来说并没有太大的收获。

我在 AWS 上的 Lambda 中使用 nodeJS 函数

我有一个缓冲区形式的 zipfile(只需使用 getObject 从 S3 读取)。zip 永远不会超过 10mb,因此可以在内存中执行它,并且可以避免使用流,并希望避免使用本地 tmp。

每个 zip 都有一些文件和文件夹,我想获取名为“src/”的文件夹中的所有文件。这对所有拉链都是一样的。这些文件不需要解压缩,我想将它们直接放入一个没有“src”子文件夹的新 zip - 所以只是新 zip 根目录上的“src”中的文件和文件夹,压缩后的任何文件夹src/ 应该保留它们的层次结构。

最终结果应该是输出 zip 的缓冲区。

任何帮助将非常感激。

4

1 回答 1

0

我自己可能已经解决了它,但是我仍然愿意对此进行任何改进,因为它在解压缩然后再次压缩时感觉有点笨拙和效率低下。

const yauzl = require("yauzl");
const yazl = require("yazl");

const uploadStream = ({ Bucket, Key }) => {
    const s3 = new AWS.S3();
    const pass = new stream.PassThrough();
    return {
      writeStream: pass,
      promise: s3.upload({ Bucket, Key, Body: pass }).promise(),
    };
  }


  // retrieve the artifact
  return s3.getObject({Bucket: bucket, Key: key}).promise().then(res => {

    // unzip the artifact
    return new Promise(function(resolve, reject){

      // new zip that will contain only src
      const newzip = new yazl.ZipFile();

      // read the zip from buffer (entire zip, this cannot be streamed)
      yauzl.fromBuffer(res.Body, {lazyEntries: true}, (err, zip) => {
        if(err) {
          console.log("Error accessing artifact: ",err);
          return reject("Error accessing artifact");
        }

        // read each item in the zip
        zip.readEntry();
        zip.on("entry", function(entry){
          // we only want files in the src dir, skip others
          if(entry.fileName.substr(0,3) !== "src") return zip.readEntry();

          // extract file
          zip.openReadStream(entry, {decompress: entry.isCompressed() ? true : null}, function (err, readStream) {
            if(err){
              zip.close();
              console.log("Failed to read file in artifact: ", err);
              return reject("Failed to read file in artifact");
            }

            // collect data into buffer
            let buffer = null;
            readStream.on('data', function(d) {
              if(!buffer){
                buffer = d;
              }else{
                buffer = Buffer.concat([buffer, d]);
              }
            });

            // file data collection completed
            readStream.on('end', function () {

              // add it to the new zip (without the src dir in the path)
              newzip.addBuffer(buffer,entry.fileName.substr(4));

              // continue to next entry
              zip.readEntry();
            });

            // fail on error
            readStream.on('error', function (err) {
              zip.close();
              console.log("Failed to extract file from artifact: ", err);
              return reject("Failed to extract file from artifact");
            });
          });

        });

        // all items processed
        zip.on("end", function(){
          console.log("Completed extracting all files");

          // all files added
          newzip.end();

          // store on s3
          const { writeStream, promise } = uploadStream({Bucket: bucket, Key: key+"Deploy"});
          newzip.outputStream.pipe(writeStream).on("close", function(){
            resolve({result:true,artifact:key+"Deploy"});
          });

        });

      });

    }).catch(err => {
      console.log("Unzip artifact error: ",err);
      return Promise.reject("Could not unzip artifact");
    });


  }).catch(err => {
    console.log("Retrieve artifact error: ",err);
    return Promise.reject("Could not retrieve artifact");
  });
于 2021-01-21T15:10:58.677 回答