From 8cc35e2c13cb2385e0b2d7a0ee926b56ef757379 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 1 Aug 2020 11:18:34 -0700 Subject: fix wrong decompression logic --- other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'other/java/client/src/main') diff --git a/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java b/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java index 05457ed48..48d942f88 100644 --- a/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java +++ b/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java @@ -97,7 +97,7 @@ public class SeaweedRead { } if (chunkView.isCompressed) { - data = Gzip.decompress(data); + // data = Gzip.decompress(data); } if (chunkView.cipherKey != null && chunkView.cipherKey.length != 0) { -- cgit v1.2.3 From 4d9da157bc4794524855f455d9df55c73f0dbdb0 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 1 Aug 2020 11:36:29 -0700 Subject: HDFS: read gzip content --- .../src/main/java/seaweedfs/client/SeaweedRead.java | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'other/java/client/src/main') diff --git a/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java b/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java index 48d942f88..9edbfb799 100644 --- a/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java +++ b/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java @@ -2,6 +2,7 @@ package seaweedfs.client; import org.apache.http.HttpEntity; import org.apache.http.HttpHeaders; +import org.apache.http.client.entity.GzipDecompressingEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.util.EntityUtils; @@ -78,7 +79,7 @@ public class SeaweedRead { HttpGet request = new HttpGet( String.format("http://%s/%s", locations.getLocations(0).getUrl(), chunkView.fileId)); - request.setHeader(HttpHeaders.ACCEPT_ENCODING, ""); + request.setHeader(HttpHeaders.ACCEPT_ENCODING, "gzip"); byte[] data = null; @@ -87,6 +88,18 @@ public class SeaweedRead { try { HttpEntity entity = response.getEntity(); + Header contentEncodingHeader = entity.getContentEncoding(); + + if (contentEncodingHeader != null) { + HeaderElement[] encodings =contentEncodingHeader.getElements(); + for (int i = 0; i < encodings.length; i++) { + if (encodings[i].getName().equalsIgnoreCase("gzip")) { + entity = new GzipDecompressingEntity(entity); + break; + } + } + } + data = EntityUtils.toByteArray(entity); EntityUtils.consume(entity); @@ -96,10 +109,6 @@ public class SeaweedRead { request.releaseConnection(); } - if (chunkView.isCompressed) { - // data = Gzip.decompress(data); - } - if (chunkView.cipherKey != null && chunkView.cipherKey.length != 0) { try { data = SeaweedCipher.decrypt(data, chunkView.cipherKey); -- cgit v1.2.3 From e1f070a9a1699578de4f1513f62453c7a490514f Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sat, 1 Aug 2020 12:42:41 -0700 Subject: Hadoop: 1.4.4 --- other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java | 2 ++ 1 file changed, 2 insertions(+) (limited to 'other/java/client/src/main') diff --git a/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java b/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java index 9edbfb799..fa44ee4af 100644 --- a/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java +++ b/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java @@ -1,5 +1,7 @@ package seaweedfs.client; +import org.apache.http.Header; +import org.apache.http.HeaderElement; import org.apache.http.HttpEntity; import org.apache.http.HttpHeaders; import org.apache.http.client.entity.GzipDecompressingEntity; -- cgit v1.2.3 From 0ed1f43d29921180f6de28148379dba3063c4109 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sun, 2 Aug 2020 23:50:23 -0700 Subject: decompress after decrypt if necessary skip any decompress error --- other/java/client/src/main/java/seaweedfs/client/Gzip.java | 14 +++++++++----- .../client/src/main/java/seaweedfs/client/SeaweedRead.java | 4 ++++ 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'other/java/client/src/main') diff --git a/other/java/client/src/main/java/seaweedfs/client/Gzip.java b/other/java/client/src/main/java/seaweedfs/client/Gzip.java index 248285dd3..4909094f5 100644 --- a/other/java/client/src/main/java/seaweedfs/client/Gzip.java +++ b/other/java/client/src/main/java/seaweedfs/client/Gzip.java @@ -18,14 +18,18 @@ public class Gzip { return compressed; } - public static byte[] decompress(byte[] compressed) throws IOException { - ByteArrayInputStream bis = new ByteArrayInputStream(compressed); - GZIPInputStream gis = new GZIPInputStream(bis); - return readAll(gis); + public static byte[] decompress(byte[] compressed) { + try { + ByteArrayInputStream bis = new ByteArrayInputStream(compressed); + GZIPInputStream gis = new GZIPInputStream(bis); + return readAll(gis); + } catch (Exception e) { + return compressed; + } } private static byte[] readAll(InputStream input) throws IOException { - try( ByteArrayOutputStream output = new ByteArrayOutputStream()){ + try (ByteArrayOutputStream output = new ByteArrayOutputStream()) { byte[] buffer = new byte[4096]; int n; while (-1 != (n = input.read(buffer))) { diff --git a/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java b/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java index fa44ee4af..cd2f55678 100644 --- a/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java +++ b/other/java/client/src/main/java/seaweedfs/client/SeaweedRead.java @@ -119,6 +119,10 @@ public class SeaweedRead { } } + if (chunkView.isCompressed) { + data = Gzip.decompress(data); + } + LOG.debug("doFetchFullChunkData fid:{} chunkData.length:{}", chunkView.fileId, data.length); return data; -- cgit v1.2.3 From 9981748498113759eaedf2efdb93a5b485d8c55a Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sun, 2 Aug 2020 23:50:44 -0700 Subject: only try to cache chunk manifest chunks --- .../client/src/main/java/seaweedfs/client/FileChunkManifest.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'other/java/client/src/main') diff --git a/other/java/client/src/main/java/seaweedfs/client/FileChunkManifest.java b/other/java/client/src/main/java/seaweedfs/client/FileChunkManifest.java index 28c2f47fc..1248ff13f 100644 --- a/other/java/client/src/main/java/seaweedfs/client/FileChunkManifest.java +++ b/other/java/client/src/main/java/seaweedfs/client/FileChunkManifest.java @@ -76,8 +76,11 @@ public class FileChunkManifest { LOG.debug("doFetchFullChunkData:{}", chunkView); chunkData = SeaweedRead.doFetchFullChunkData(chunkView, locations); } - LOG.debug("chunk {} size {}", chunkView.fileId, chunkData.length); - SeaweedRead.chunkCache.setChunk(chunkView.fileId, chunkData); + if(chunk.getIsChunkManifest()){ + // only cache manifest chunks + LOG.debug("chunk {} size {}", chunkView.fileId, chunkData.length); + SeaweedRead.chunkCache.setChunk(chunkView.fileId, chunkData); + } return chunkData; -- cgit v1.2.3 From d6073f638670c24f3eb189b7d74dfaebd477fbff Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Mon, 3 Aug 2020 00:40:23 -0700 Subject: ensure GC --- other/java/client/src/main/java/seaweedfs/client/ChunkCache.java | 1 + 1 file changed, 1 insertion(+) (limited to 'other/java/client/src/main') diff --git a/other/java/client/src/main/java/seaweedfs/client/ChunkCache.java b/other/java/client/src/main/java/seaweedfs/client/ChunkCache.java index 58870d742..7afa2dca0 100644 --- a/other/java/client/src/main/java/seaweedfs/client/ChunkCache.java +++ b/other/java/client/src/main/java/seaweedfs/client/ChunkCache.java @@ -15,6 +15,7 @@ public class ChunkCache { } this.cache = CacheBuilder.newBuilder() .maximumSize(maxEntries) + .weakValues() .expireAfterAccess(1, TimeUnit.HOURS) .build(); } -- cgit v1.2.3