Implement PBEWITHHMACSHA512ANDAES_256 of java jasypt in python - java

I am trying to encrypt a password in python and decrypt it in java springboot application using the jasypt library through jasypt plugin.
What i have done so far
For simplicity i have used a zero salt and a fixed iv
I have written the python script to perform the encryption using hselvarajan's pkcs12kdf
import sys
import math
import base64
import hashlib
from Crypto.Cipher import AES
from Crypto.Hash import SHA512
from binascii import hexlify
from binascii import unhexlify
PY2 = sys.version_info[0] == 2
PY3 = sys.version_info[0] == 3
if PY2:
str_encode = lambda s: str(s)
elif PY3:
str_encode = lambda s: str(s, 'utf-8')
iterations = 10000
salt_block_size = AES.block_size
key_size = 256
password = "test1"
plaintext_to_encrypt = "password1"
salt = "0000000000000000"
iv = "0000000000000000"
# -----------------------------------------------------------------------------
# This is a pure copy paste of
# https://github.com/hselvarajan/pkcs12kdf/blob/master/pkcs12kdf.py
# -----------------------------------------------------------------------------
class PKCS12KDF:
"""This class generates keys and initialization vectors from passwords as specified in RFC 7292"""
#
# IDs for Key and IV material as in RFC
#
KEY_MATERIAL = 1
IV_MATERIAL = 2
def __init__(self, password, salt, iteration_count, hash_algorithm, key_length_bits):
self._password = password
self._salt = salt
self._iteration_count = iteration_count
self._block_size_bits = None
self._hash_length_bits = None
self._key_length_bytes = key_length_bits/8
self._key = None
self._iv = None
self._hash_algorithm = hash_algorithm
#
# Turns a byte array into a long
#
#staticmethod
def byte_array_to_long(byte_array, nbytes=None):
#
# If nbytes is not present
#
if nbytes is None:
#
# Convert byte -> hex -> int/long
#
return int(hexlify(byte_array), 16)
else:
#
# Convert byte -> hex -> int/long
#
return int(hexlify(byte_array[-nbytes:]), 16)
#
# Turn a long into a byte array
#
#staticmethod
def long_to_byte_array(val, nbytes=None):
hexval = hex(val)[2:-1] if type(val) is long else hex(val)[2:]
if nbytes is None:
return unhexlify('0' * (len(hexval) & 1) + hexval)
else:
return unhexlify('0' * (nbytes * 2 - len(hexval)) + hexval[-nbytes * 2:])
#
# Run the PKCS12 algorithm for either the key or the IV, specified by id
#
def generate_derived_parameters(self, id):
#
# Let r be the iteration count
#
r = self._iteration_count
if self._hash_algorithm not in hashlib.algorithms_available:
raise NotImplementedError("Hash function: "+self._hash_algorithm+" not available")
hash_function = hashlib.new(self._hash_algorithm)
#
# Block size, bytes
#
#v = self._block_size_bits / 8
v = hash_function.block_size
#
# Hash function output length, bits
#
#u = self._hash_length_bits / 8
u = hash_function.digest_size
# In this specification however, all passwords are created from BMPStrings with a NULL
# terminator. This means that each character in the original BMPString is encoded in 2
# bytes in big-endian format (most-significant byte first). There are no Unicode byte order
# marks. The 2 bytes produced from the last character in the BMPString are followed by
# two additional bytes with the value 0x00.
password = (unicode(self._password) + u'\0').encode('utf-16-be') if self._password is not None else b''
#
# Length of password string, p
#
p = len(password)
#
# Length of salt, s
#
s = len(self._salt)
#
# Step 1: Construct a string, D (the "diversifier"), by concatenating v copies of ID.
#
D = chr(id) * v
#
# Step 2: Concatenate copies of the salt, s, together to create a string S of length v * [s/v] bits (the
# final copy of the salt may be truncated to create S). Note that if the salt is the empty
# string, then so is S
#
S = b''
if self._salt is not None:
limit = int(float(v) * math.ceil((float(s)/float(v))))
for i in range(0, limit):
S += (self._salt[i % s])
else:
S += '0'
#
# Step 3: Concatenate copies of the password, p, together to create a string P of length v * [p/v] bits
# (the final copy of the password may be truncated to create P). Note that if the
# password is the empty string, then so is P.
#
P = b''
if password is not None:
limit = int(float(v) * math.ceil((float(p)/float(v))))
for i in range(0, limit):
P += password[i % p]
else:
P += '0'
#
# Step 4: Set I=S||P to be the concatenation of S and P.\00\00
#
I = bytearray(S) + bytearray(P)
#
# 5. Set c=[n/u]. (n = length of key/IV required)
#
n = self._key_length_bytes
c = int(math.ceil(float(n)/float(u)))
#
# Step 6 For i=1, 2,..., c, do the following:
#
Ai = bytearray()
for i in range(0, c):
#
# Step 6a.Set Ai=Hr(D||I). (i.e. the rth hash of D||I, H(H(H(...H(D||I))))
#
hash_function = hashlib.new(self._hash_algorithm)
hash_function.update(bytearray(D))
hash_function.update(bytearray(I))
Ai = hash_function.digest()
for j in range(1, r):
hash_function = hashlib.sha256()
hash_function.update(Ai)
Ai = hash_function.digest()
#
# Step 6b: Concatenate copies of Ai to create a string B of length v bits (the final copy of Ai
# may be truncated to create B).
#
B = b''
for j in range(0, v):
B += Ai[j % len(Ai)]
#
# Step 6c: Treating I as a concatenation I0, I1,..., Ik-1 of v-bit blocks, where k=[s/v]+[p/v],
# modify I by setting Ij=(Ij+B+1) mod 2v for each j.
#
k = int(math.ceil(float(s)/float(v)) + math.ceil((float(p)/float(v))))
for j in range(0, k-1):
I = ''.join([
self.long_to_byte_array(
self.byte_array_to_long(I[j:j + v]) + self.byte_array_to_long(bytearray(B)), v
)
])
return Ai[:self._key_length_bytes]
#
# Generate the key and IV
#
def generate_key_and_iv(self):
self._key = self.generate_derived_parameters(self.KEY_MATERIAL)
self._iv = self.generate_derived_parameters(self.IV_MATERIAL)
return self._key, self._iv
# -----------------------------------------------------------------------------
# Main execution
# -----------------------------------------------------------------------------
kdf = PKCS12KDF(
password = password,
salt = salt,
iteration_count = iterations,
hash_algorithm = "sha512",
key_length_bits = key_size
)
(key, iv_tmp) = kdf.generate_key_and_iv()
aes_key = key[:32]
pad = salt_block_size - len(plaintext_to_encrypt) % salt_block_size
plaintext_to_encrypt = plaintext_to_encrypt + pad * chr(pad)
cipher = AES.new(aes_key, AES.MODE_CBC, iv)
encrypted = cipher.encrypt(plaintext_to_encrypt)
# Since we selt the salt to be zero's,
# jasypt needs only the iv + encrypted value,
# not the salt + iv + encrypted
result = str_encode(base64.b64encode(iv + encrypted))
# Python output : MDAwMDAwMDAwMDAwMDAwMKWsWH+Ku37n7ddfj0ayxp8=
# Java output : MDAwMDAwMDAwMDAwMDAwMAtqAfBtuxf+F5qqzC8QiFc=
print(result)
Run it as
python2.7 test-PBEWITHHMACSHA512ANDAES_256.py
paxYf4q7fuft11+PRrLGnw==
I have written a unit test in jasypt repository to decrypt
See PBEWITHHMACSHA512ANDAES_256EncryptorTest.
Run it as
$ cd jasypt
$ mvn clean test -Dtest=org.jasypt.encryption.pbe.PBEWITHHMACSHA512ANDAES_256EncryptorTest
The problem: The above setup produces different results in python and in java
Python output : MDAwMDAwMDAwMDAwMDAwMKWsWH+Ku37n7ddfj0ayxp8=
Java output : MDAwMDAwMDAwMDAwMDAwMAtqAfBtuxf+F5qqzC8QiFc=
What i know
The failure is due to not using the using the correct key in python. Adding additional logs, the error is
EncryptionOperationNotPossibleException: javax.crypto.BadPaddingException: Given final block not properly padded. Such issues can arise if a bad key is used during decryption.
The PBEWITHHMACSHA512ANDAES_256 uses the pkcs12 key derivation function.
I do not understand where the HMAC is being used.
I have also tried using the folling implementation to no avail. I am getting "" error in all of them.
oscrypto
python-hkdf
Cryptodome.Protocol.KDF HKDF
I do not understand where the iterations are being used here.
self.aes_key = HKDF(master = self.password, key_len = 32, salt = self.salt, hashmod = SHA512, num_keys = 1)
I would like some guidance on what i am doing wrong. Any help, any pointers would be much appreciated.
Update following Cryptodome's PBKDF2 and AES
Here is the python script
import sys
import base64
from Cryptodome.Cipher import AES
from Cryptodome.Hash import SHA512
from Cryptodome.Protocol.KDF import PBKDF2
from Cryptodome.Util.Padding import pad
iterations = 10000
password = b'test1'
plaintext_to_encrypt = b'password1'
salt = b'0000000000000000'
iv = b'0000000000000000'
# -----------------------------------------------------------------------------
# Main execution
# -----------------------------------------------------------------------------
keys = PBKDF2(password, salt, 64, count=iterations, hmac_hash_module=SHA512)
aes_key = keys[:32]
cipher = AES.new(aes_key, AES.MODE_CBC, iv)
ct_bytes = cipher.encrypt(pad(plaintext_to_encrypt, AES.block_size))
encrypted = base64.b64encode(ct_bytes).decode('utf-8')
# Since we selt the salt to be zero's,
# jasypt needs only the iv + encrypted value,
# not the salt + iv + encrypted
result = encrypted
# Python output : 6tCAZbswCh9DZ1EK8utRuA==
# Java output : C2oB8G27F/4XmqrMLxCIVw==
print(result)
and its output
python2.7 test-PBEWITHHMACSHA512ANDAES_256-2.py
6tCAZbswCh9DZ1EK8utRuA==
I try to decrypt it in java with the following error using the test
mvn clean test -Dtest=org.jasypt.encryption.pbe.PBEWITHHMACSHA512ANDAES_256EncryptorTest
[...]
Running org.jasypt.encryption.pbe.PBEWITHHMACSHA512ANDAES_256EncryptorTest
Test encr: C2oB8G27F/4XmqrMLxCIVw==
Error: javax.crypto.BadPaddingException: Given final block not properly padded. Such issues can arise if a bad key is used during decryption.Tests run: 1, Failures: 0, Errors: 1, Skipped: 0, Time elapsed: 0.524 sec <<< FAILURE!
test1(org.jasypt.encryption.pbe.PBEWITHHMACSHA512ANDAES_256EncryptorTest) Time elapsed: 0.522 sec <<< ERROR!
org.jasypt.exceptions.EncryptionOperationNotPossibleException
at org.jasypt.encryption.pbe.StandardPBEByteEncryptor.decrypt(StandardPBEByteEncryptor.java:1173)
at org.jasypt.encryption.pbe.StandardPBEStringEncryptor.decrypt(StandardPBEStringEncryptor.java:738)
at org.jasypt.encryption.pbe.PBEWITHHMACSHA512ANDAES_256EncryptorTest.test1(PBEWITHHMACSHA512ANDAES_256EncryptorTest.java:27)
Results :
Tests in error:
test1(org.jasypt.encryption.pbe.PBEWITHHMACSHA512ANDAES_256EncryptorTest)
Tests run: 1, Failures: 0, Errors: 1, Skipped: 0
[INFO] ------------------------------------------------------------------------
[INFO] BUILD FAILURE
[INFO] ------------------------------------------------------------------------
[INFO] Total time: 8.648 s
[INFO] Finished at: 2020-06-24T17:40:04+08:00
[INFO] ------------------------------------------------------------------------
[ERROR] Failed to execute goal org.apache.maven.plugins:maven-surefire-plugin:2.12.4:test (default-test) on project jasypt: There are test failures.
[ERROR]
[ERROR] Please refer to /space/openbet/git/github-jasypt-jasypt/jasypt/target/surefire-reports for the individual test results.
[ERROR] -> [Help 1]
[ERROR]
[ERROR] To see the full stack trace of the errors, re-run Maven with the -e switch.
[ERROR] Re-run Maven using the -X switch to enable full debug logging.
[ERROR]
[ERROR] For more information about the errors and possible solutions, please read the following articles:
[ERROR] [Help 1] http://cwiki.apache.org/confluence/display/MAVEN/MojoFailureException

PBEWITHHMACSHA512ANDAES_256 applies PBKDF2 to generate the key. Encryption is performed with AES-256, CBC.
The (originally) posted Jasypt test function used RandomIvGenerator, which creates a random IV. For the salt, ZeroSaltGenerator is applied, which generates a salt consisting of 16 zero bytes.
To implement the Python function you are looking for, it is best to use a fixed IV, e.g. with StringFixedIvGenerator. StringFixedSaltGenerator provides a corresponding functionality for the salt (FixedStringSaltGenerator has the same functionality but is deprecated since 1.9.2). StringFixedSaltGenerator and StringFixedIvGenerator encode the passed string with UTF-8 by default (but another encoding can be specified), so that the salt (or IV) 0000000000000000 is hex encoded 0x30303030303030303030303030303030.
Note that a fixed salt and IV may only be used for testing. In practice, a new random salt and a new random IV must be used for each encryption. Since salt and IV are not secret, they are usually concatenated with the ciphertext on byte level (e.g. in the order salt, iv, ciphertext) and sent to the receiver, who separates the parts and uses them for decryption.
If the same parameters (especially the same salt and IV) are used on both sides, then encryption with Python and decryption with Java works.
Encryption with Python (PyCryptodome):
import base64
from Cryptodome.Cipher import AES
from Cryptodome.Hash import SHA512
from Cryptodome.Protocol.KDF import PBKDF2
from Cryptodome.Util.Padding import pad
# Key generation (PBKDF2)
iterations = 10000
password = b'test1'
plaintext_to_encrypt = b'password1'
salt = b'5432109876543210'
iv = b'0123456789012345'
key = PBKDF2(password, salt, 32, count=iterations, hmac_hash_module=SHA512)
# Encryption (AES-256, CBC)
cipher = AES.new(key, AES.MODE_CBC, iv)
ct_bytes = cipher.encrypt(pad(plaintext_to_encrypt, AES.block_size))
encrypted = base64.b64encode(ct_bytes).decode('utf-8')
print(encrypted) # Output: kzLd5qPlCLnHq5sT7LOXzQ==
Decryption with Java (Jasypt):
StandardPBEStringEncryptor encryptor = new StandardPBEStringEncryptor();
encryptor.setPassword("test1");
encryptor.setSaltGenerator(new StringFixedSaltGenerator("5432109876543210"));
encryptor.setIvGenerator(new StringFixedIvGenerator("0123456789012345"));
encryptor.setKeyObtentionIterations(10000);
encryptor.setAlgorithm("PBEWITHHMACSHA512ANDAES_256");
String decryptedMsg = encryptor.decrypt("kzLd5qPlCLnHq5sT7LOXzQ==");
System.out.println("Test decr: " + decryptedMsg); // Output: Test decr: password1

By the way, if anyone is still looking for this answer but with random salt and IV, it seems like they are appended to the cyphertext in order. Here is the encryption/decryption solution that is compatible with PBEWithHMACSHA512AndAES_256:
from base64 import b64decode, b64encode
from cryptography.fernet import Fernet
from cryptography.hazmat.primitives import hashes
from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
from cryptography.hazmat.backends import default_backend
from cryptography.hazmat.primitives.padding import PKCS7
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
KEY = b'my awesome key'
def decrypt_pbe_with_hmac_sha512_aes_256(obj: str) -> str:
# re-generate key from
encrypted_obj = b64decode(obj)
salt = encrypted_obj[0:16]
iv = encrypted_obj[16:32]
cypher_text = encrypted_obj[32:]
kdf = PBKDF2HMAC(hashes.SHA512(), 32, salt, 1000, backend=default_backend())
key = kdf.derive(KEY)
# decrypt
cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=default_backend())
decryptor = cipher.decryptor()
padded_text = decryptor.update(cypher_text) + decryptor.finalize()
# remove padding
unpadder = PKCS7(128).unpadder()
clear_text = unpadder.update(padded_text) + unpadder.finalize()
return clear_text.decode()
def encrypt_pbe_with_hmac_sha512_aes_256(obj: str, salt: bytes = None, iv: bytes = None) -> str:
# generate key
salt = salt or os.urandom(16)
iv = iv or os.urandom(16)
kdf = PBKDF2HMAC(hashes.SHA512(), 32, salt, 1000, backend=default_backend())
key = kdf.derive(KEY)
# pad data
padder = PKCS7(128).padder()
data = padder.update(obj.encode()) + padder.finalize()
# encrypt
cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=default_backend())
encryptor = cipher.encryptor()
cypher_text = encryptor.update(data) + encryptor.finalize()
return b64encode(salt + iv + cypher_text).decode()
Then you can use it directly using the base64 output of Jasypt:
>>> decrypt_pbe_with_hmac_sha512_aes_256(encrypt_pbe_with_hmac_sha512_aes_256('hello world'))
'hello world'

Related

Difference between Go DSA and Java DSA [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 1 year ago.
Improve this question
Go generates a signature using a DSA private key
Java verifies first step result using the DSA public key
Java should return true, but returns false
package main
import (
"crypto/dsa"
"crypto/rand"
"encoding/asn1"
"encoding/hex"
"fmt"
"golang.org/x/crypto/ssh"
"math/big"
)
func main() {
// a dsa private key
pemData := []byte("-----BEGIN DSA PRIVATE KEY-----\n" +
"MIIBvAIBAAKBgQD9f1OBHXUSKVLfSpwu7OTn9hG3UjzvRADDHj+AtlEmaUVdQCJR\n" +
"+1k9jVj6v8X1ujD2y5tVbNeBO4AdNG/yZmC3a5lQpaSfn+gEexAiwk+7qdf+t8Yb\n" +
"+DtX58aophUPBPuD9tPFHsMCNVQTWhaRMvZ1864rYdcq7/IiAxmd0UgBxwIVAJdg\n" +
"UI8VIwvMspK5gqLrhAvwWBz1AoGBAPfhoIXWmz3ey7yrXDa4V7l5lK+7+jrqgvlX\n" +
"TAs9B4JnUVlXjrrUWU/mcQcQgYC0SRZxI+hMKBYTt88JMozIpuE8FnqLVHyNKOCj\n" +
"rh4rs6Z1kW6jfwv6ITVi8ftiegEkO8yk8b6oUZCJqIPf4VrlnwaSi2ZegHtVJWQB\n" +
"TDv+z0kqAoGBAIb9o0KPsjAdzjK571e1Mx7ZhEyJGrcxHiN2sW8IztEbqrKKiMxp\n" +
"NlTwm234uBdtzVHE3uDWZpfHPMIRmwBjCYDFRowWWVRdhdFXZlpCyp1gMWqJ11dh\n" +
"3FI3+O43DevRSyyuLRVCNQ1J3iVgwY5ndRpZU7n6y8DPH4/4EBT7KvnVAhR4Vwun\n" +
"Fhu/+4AGaVeMEa814I3dqg==\n" +
"-----END DSA PRIVATE KEY-----")
// parse dsa
p, _ := ssh.ParseRawPrivateKey(pemData)
pp := p.(*dsa.PrivateKey)
// orign data
hashed := []byte{1}
r, s, _ := dsa.Sign(rand.Reader, pp, hashed)
type dsaSignature struct {
R, S *big.Int
}
var ss dsaSignature
ss.S = s
ss.R = r
signatureBytes, _ := asn1.Marshal(ss)
// print sign
fmt.Println(hex.EncodeToString(signatureBytes))
}
Java reads the DSA public key and initialize a signer
Java verify first step sign result
returns false
#Test
public void ttt() throws InvalidKeySpecException, NoSuchAlgorithmException, InvalidKeyException, SignatureException {
// DSA public key
String pubKey = "-----BEGIN PUBLIC KEY-----\n" +
"MIIBuDCCASwGByqGSM44BAEwggEfAoGBAP1/U4EddRIpUt9KnC7s5Of2EbdSPO9E\n" +
"AMMeP4C2USZpRV1AIlH7WT2NWPq/xfW6MPbLm1Vs14E7gB00b/JmYLdrmVClpJ+f\n" +
"6AR7ECLCT7up1/63xhv4O1fnxqimFQ8E+4P208UewwI1VBNaFpEy9nXzrith1yrv\n" +
"8iIDGZ3RSAHHAhUAl2BQjxUjC8yykrmCouuEC/BYHPUCgYEA9+GghdabPd7LvKtc\n" +
"NrhXuXmUr7v6OuqC+VdMCz0HgmdRWVeOutRZT+ZxBxCBgLRJFnEj6EwoFhO3zwky\n" +
"jMim4TwWeotUfI0o4KOuHiuzpnWRbqN/C/ohNWLx+2J6ASQ7zKTxvqhRkImog9/h\n" +
"WuWfBpKLZl6Ae1UlZAFMO/7PSSoDgYUAAoGBAIb9o0KPsjAdzjK571e1Mx7ZhEyJ\n" +
"GrcxHiN2sW8IztEbqrKKiMxpNlTwm234uBdtzVHE3uDWZpfHPMIRmwBjCYDFRowW\n" +
"WVRdhdFXZlpCyp1gMWqJ11dh3FI3+O43DevRSyyuLRVCNQ1J3iVgwY5ndRpZU7n6\n" +
"y8DPH4/4EBT7KvnV\n" +
"-----END PUBLIC KEY-----";
String publicKeyPEM = pubKey
.replace("-----BEGIN PUBLIC KEY-----\n", "")
.replaceAll(System.lineSeparator(), "")
.replace("-----END PUBLIC KEY-----", "");
byte[] publicEncoded = Base64.decodeBase64(publicKeyPEM);
KeyFactory keyFactory1 = KeyFactory.getInstance("DSA");
X509EncodedKeySpec publicKeySpec = new X509EncodedKeySpec(publicEncoded);
DSAPublicKey pubKeyy = (DSAPublicKey) keyFactory1.generatePublic(publicKeySpec);
// init signer
Signature sig1 = Signature.getInstance("DSA");
sig1.initVerify(pubKeyy);
sig1.update(new byte[]{1});
// verify first result
System.out.println(sig1.verify(HexUtil.decodeHex("first step result")));
}
i tred to use NONEwithDSA within the Java implementation but it didnt do it
Signature sig1 = Signature.getInstance("NONEwithDSA");
java.security.SignatureException: Data for RawDSA must be exactly 20 bytes long
i tred to use SHA1withDSA within the Java implementation but it didnt do it
Signature sig1 = Signature.getInstance("SHA1withDSA");
returns false
In Java the (Signature) algorithm name DSA is an alias for SHA1withDSA, i.e. the original FIPS186-0 algorithm. This is not the same as the nonstandard 'raw' primitive apparently implemented by Go. NONEwithDSA is indeed the correct Java name for what you want, but the implementation in the 'standard' (SUN) provider is something of a kludge that requires exactly 20 bytes of data, not more or less, because that was the size of the SHA1 hash which was the only standard hash for DSA prior to FIPS186-3.
If you (have or can get and) use the BouncyCastle provider, it does not have this restriction, and should work for your code changed to NONEwithDSA (and either the code or security config modified so that BC is selected as the provider, of course).
If you don't use Bouncy, I think you'll have to code the algorithm yourself; I don't think there's any way to get the SUN implementation to do what you want.
Although it would be better to sign a properly-sized hash as specified in the standard, not raw data, and then you could use the Java providers as specified and designed.

python kivy android app crashes after running apk on mobile device

I was trying to develop an android app using the kivy python framework. The program connects with a remote mysql database. A part of the code (registration and login page) was tested in pyCharm and found to be working perfectly. For converting to an android app, Ubuntu 19.10 OS running on Oracle VM VirtualBox was used. APK file was obtained by running command buildozer android debug. But on running the command buildozer android deploy run, the following output with error message comes and app crashes.
List of devices attached
ZX1PC222GV device
Run on ZX1PC222GV
Run '/home/nirmal/.buildozer/android/platform/android-sdk/platform-tools/adb shell am start -n org.test.kkfoodies/org.kivy.android.PythonActivity -a org.kivy.android.PythonActivity'
Cwd /home/nirmal/.buildozer/android/platform
Starting: Intent { act=org.kivy.android.PythonActivity
cmp=org.test.kkfoodies/org.kivy.android.PythonActivity }
Error type 3
Error: Activity class {org.test.kkfoodies/org.kivy.android.PythonActivity} does not exist.
Application started
Here is my buildozer.spec file
[app]
# (str) Title of your application
title = KK Foodies
# (str) Package name
package.name = kkfoodies
# (str) Package domain (needed for android/ios packaging)
package.domain = org.test
# (str) Source code where the main.py live
source.dir = .
# (list) Source files to include (let empty to include all the files)
source.include_exts = py,png,jpg,kv,atlas
# (list) List of inclusions using pattern matching
#source.include_patterns = assets/*,images/*.png
# (list) Source files to exclude (let empty to not exclude anything)
#source.exclude_exts = spec
# (list) List of directory to exclude (let empty to not exclude anything)
#source.exclude_dirs = tests, bin
# (list) List of exclusions using pattern matching
#source.exclude_patterns = license,images/*/*.jpg
# (str) Application versioning (method 1)
version = 1.0
# (str) Application versioning (method 2)
# version.regex = __version__ = ['"](.*)['"]
# version.filename = %(source.dir)s/main.py
# (list) Application requirements
# comma separated e.g. requirements = sqlite3,kivy
requirements = python3,kivy
# (str) Custom source folders for requirements
# Sets custom source for any requirements with recipes
# requirements.source.kivy = ../../kivy
# (list) Garden requirements
#garden_requirements =
# (str) Presplash of the application
#presplash.filename = %(source.dir)s/data/presplash.png
# (str) Icon of the application
#icon.filename = %(source.dir)s/data/icon.png
# (str) Supported orientation (one of landscape, sensorLandscape, portrait or all)
orientation = portrait
# (list) List of service to declare
#services = NAME:ENTRYPOINT_TO_PY,NAME2:ENTRYPOINT2_TO_PY
#
# OSX Specific
#
#
# author = © Copyright Info
# change the major version of python used by the app
osx.python_version = 3
# Kivy version to use
osx.kivy_version = 1.9.1
#
# Android specific
#
# (bool) Indicate if the application should be fullscreen or not
fullscreen = 0
# (string) Presplash background color (for new android toolchain)
# Supported formats are: #RRGGBB #AARRGGBB or one of the following names:
# red, blue, green, black, white, gray, cyan, magenta, yellow, lightgray,
# darkgray, grey, lightgrey, darkgrey, aqua, fuchsia, lime, maroon, navy,
# olive, purple, silver, teal.
#android.presplash_color = #FFFFFF
# (list) Permissions
#android.permissions = INTERNET
# (int) Target Android API, should be as high as possible.
#android.api = 27
# (int) Minimum API your APK will support.
android.minapi = 21
# (int) Android SDK version to use
#android.sdk = 20
# (str) Android NDK version to use
#android.ndk = 17c
# (int) Android NDK API to use. This is the minimum API your app will support, it should usually match android.minapi.
#android.ndk_api = 21
# (bool) Use --private data storage (True) or --dir public storage (False)
#android.private_storage = True
# (str) Android NDK directory (if empty, it will be automatically downloaded.)
#android.ndk_path =
# (str) Android SDK directory (if empty, it will be automatically downloaded.)
#android.sdk_path =
# (str) ANT directory (if empty, it will be automatically downloaded.)
#android.ant_path =
# (bool) If True, then skip trying to update the Android sdk
# This can be useful to avoid excess Internet downloads or save time
# when an update is due and you just want to test/build your package
# android.skip_update = False
# (bool) If True, then automatically accept SDK license
# agreements. This is intended for automation only. If set to False,
# the default, you will be shown the license when first running
# buildozer.
# android.accept_sdk_license = False
# (str) Android entry point, default is ok for Kivy-based app
#android.entrypoint = org.renpy.android.PythonActivity
# (str) Android app theme, default is ok for Kivy-based app
# android.apptheme = "#android:style/Theme.NoTitleBar"
# (list) Pattern to whitelist for the whole project
#android.whitelist =
# (str) Path to a custom whitelist file
#android.whitelist_src =
# (str) Path to a custom blacklist file
#android.blacklist_src =
# (list) List of Java .jar files to add to the libs so that pyjnius can access
# their classes. Don't add jars that you do not need, since extra jars can slow
# down the build process. Allows wildcards matching, for example:
# OUYA-ODK/libs/*.jar
#android.add_jars = foo.jar,bar.jar,path/to/more/*.jar
# (list) List of Java files to add to the android project (can be java or a
# directory containing the files)
#android.add_src =
# (list) Android AAR archives to add (currently works only with sdl2_gradle
# bootstrap)
#android.add_aars =
# (list) Gradle dependencies to add (currently works only with sdl2_gradle
# bootstrap)
#android.gradle_dependencies =
# (list) add java compile options
# this can for example be necessary when importing certain java libraries using the 'android.gradle_dependencies' option
# see https://developer.android.com/studio/write/java8-support for further information
# android.add_compile_options = "sourceCompatibility = 1.8", "targetCompatibility = 1.8"
# (list) Gradle repositories to add {can be necessary for some android.gradle_dependencies}
# please enclose in double quotes
# e.g. android.gradle_repositories = "maven { url 'https://kotlin.bintray.com/ktor' }"
#android.add_gradle_repositories =
# (list) packaging options to add
# see https://google.github.io/android-gradle- dsl/current/com.android.build.gradle.internal.dsl.PackagingOptions.html
# can be necessary to solve conflicts in gradle_dependencies
# please enclose in double quotes
# e.g. android.add_packaging_options = "exclude 'META- INF/common.kotlin_module'", "exclude 'META-INF/*.kotlin_module'"
#android.add_gradle_repositories =
# (list) Java classes to add as activities to the manifest.
#android.add_activites = com.example.ExampleActivity
# (str) OUYA Console category. Should be one of GAME or APP
# If you leave this blank, OUYA support will not be enabled
#android.ouya.category = GAME
# (str) Filename of OUYA Console icon. It must be a 732x412 png image.
#android.ouya.icon.filename = %(source.dir)s/data/ouya_icon.png
# (str) XML file to include as an intent filters in <activity> tag
#android.manifest.intent_filters =
# (str) launchMode to set for the main activity
#android.manifest.launch_mode = standard
# (list) Android additional libraries to copy into libs/armeabi
#android.add_libs_armeabi = libs/android/*.so
#android.add_libs_armeabi_v7a = libs/android-v7/*.so
#android.add_libs_arm64_v8a = libs/android-v8/*.so
#android.add_libs_x86 = libs/android-x86/*.so
#android.add_libs_mips = libs/android-mips/*.so
# (bool) Indicate whether the screen should stay on
# Don't forget to add the WAKE_LOCK permission if you set this to True
#android.wakelock = False
# (list) Android application meta-data to set (key=value format)
#android.meta_data =
# (list) Android library project to add (will be added in the
# project.properties automatically.)
#android.library_references =
# (list) Android shared libraries which will be added to AndroidManifest.xml using <uses-library> tag
#android.uses_library =
# (str) Android logcat filters to use
#android.logcat_filters = *:S python:D
# (bool) Copy library instead of making a libpymodules.so
#android.copy_libs = 1
# (str) The Android arch to build for, choices: armeabi-v7a, arm64-v8a, x86, x86_64
android.arch = armeabi-v7a
#
# Python for android (p4a) specific
#
# (str) python-for-android fork to use, defaults to upstream (kivy)
#p4a.fork = kivy
# (str) python-for-android branch to use, defaults to master
#p4a.branch = master
# (str) python-for-android git clone directory (if empty, it will be automatically cloned from github)
#p4a.source_dir =
# (str) The directory in which python-for-android should look for your own build recipes (if any)
#p4a.local_recipes =
# (str) Filename to the hook for p4a
#p4a.hook =
# (str) Bootstrap to use for android builds
# p4a.bootstrap = sdl2
# (int) port number to specify an explicit --port= p4a argument (eg for bootstrap flask)
#p4a.port =
#
# iOS specific
#
# (str) Path to a custom kivy-ios folder
#ios.kivy_ios_dir = ../kivy-ios
# Alternately, specify the URL and branch of a git checkout:
ios.kivy_ios_url = https://github.com/kivy/kivy-ios
ios.kivy_ios_branch = master
# Another platform dependency: ios-deploy
# Uncomment to use a custom checkout
#ios.ios_deploy_dir = ../ios_deploy
# Or specify URL and branch
ios.ios_deploy_url = https://github.com/phonegap/ios-deploy
ios.ios_deploy_branch = 1.7.0
# (str) Name of the certificate to use for signing the debug version
# Get a list of available identities: buildozer ios list_identities
#ios.codesign.debug = "iPhone Developer: <lastname> <firstname> (<hexstring>)"
# (str) Name of the certificate to use for signing the release version
#ios.codesign.release = %(ios.codesign.debug)s
[buildozer]
# (int) Log level (0 = error only, 1 = info, 2 = debug (with command output))
log_level = 2
# (int) Display warning if buildozer is run as root (0 = False, 1 = True)
warn_on_root = 1
# (str) Path to build artifact storage, absolute or relative to spec file
# build_dir = ./.buildozer
# (str) Path to build output (i.e. .apk, .ipa) storage
# bin_dir = ./bin
# -----------------------------------------------------------------------------
# List as sections
#
# You can define all the "list" as [section:key].
# Each line will be considered as a option to the list.
# Let's take [app] / source.exclude_patterns.
# Instead of doing:
#
#[app]
#source.exclude_patterns = license,data/audio/*.wav,data/images/original/*
#
# This can be translated into:
#
#[app:source.exclude_patterns]
#license
#data/audio/*.wav
#data/images/original/*
#
# -----------------------------------------------------------------------------
# Profiles
#
# You can extend section / key with a profile
# For example, you want to deploy a demo version of your application without
# HD content. You could first change the title to add "(demo)" in the name
# and extend the excluded directories to remove the HD content.
#
#[app#demo]
#title = My Application (demo)
#
#[app:source.exclude_patterns#demo]
#images/hd/*
#
# Then, invoke the command line with the "demo" profile:
#
#buildozer --profile demo android debug
The python file is given below.
from kivy.app import App
from kivy.properties import ObjectProperty
from kivy.uix.boxlayout import BoxLayout
from kivy.uix.relativelayout import RelativeLayout
from kivy.uix.scrollview import ScrollView
from kivy.uix.label import Label
from kivy.uix.button import Button
from kivy.uix.togglebutton import ToggleButton
from kivy.uix.checkbox import CheckBox
from kivy.uix.spinner import Spinner
from kivy.properties import NumericProperty #, ListProperty
from kivy.uix.textinput import TextInput
from kivy.uix.popup import Popup
# from kivy.uix.bubble import Bubble
from kivy.uix.image import Image
from kivy.lang import Builder
import MySQLdb
import hashlib, binascii, os
from kivy.uix.screenmanager import ScreenManager, Screen
# from datetime import date, timedelta
from kivy.uix.popup import Popup
from datepicker import DatePicker # , CalendarWidget
import base64
from functools import partial
import textwrap
# import numpy as np
# import cv2
# import io
# import PIL.Image
#_imaging = PIL.Image.core
# from PIL import Image
# from PIL.Image import core as _imaging
# import Image
# import sys
# import cStringIO
# import timepicker
# from kivy.garden.circulardatetimepicker import CircularTimePicker
from kivy.core.window import Window
# Window.clearcolor = (204/255, 1, 244/255, 0)
class ScreenManagement(ScreenManager):
pass
myname = ''
selldate = ''
roomno = ''
try:
dbconnect = MySQLdb.connect("IP", "username", "password",
"dbname")
except (MySQLdb.Error) as e: # , MySQLdb.Warning
print("Can't connect to database", e)
exit()
# return 0
# If Connection Is Successful
# print("Connected")
crsr = dbconnect.cursor()
class ListHeader(Button):
def __init__(self, **kwargs):
super().__init__(**kwargs)
# bcolor = ListProperty([1, 1, 1, 1])
class ListCell(Label):
def __init__(self, **kwargs):
super().__init__(**kwargs)
# bcolor = ListProperty([1, 1, 1, 1])
class DbCon:
def __init__(self, **kwargs):
super().__init__(**kwargs)
def get_row(self, uname):
query = "SELECT username, passwordh, emp_no, full_name, room_no, authorisation_flag FROM residents WHERE username = '%s" % uname + "'"
# query = "SELECT userid, pass, mob, fname, lname, mailid FROM login_data WHERE userid = '%s" % uname + "'"
print(query)
crsr.execute(query)
return crsr.fetchone()
def add_row(self, username, password, empnum, fullname, roomnum):
hashedpwd = self.hash_password(password)
print(hashedpwd)
sqlquery = "INSERT INTO residents (username, passwordh, emp_no, full_name, room_no) VALUES (%s, %s, %s, %s, %s)"
insert_values = (username, hashedpwd, empnum, fullname, roomnum)
crsr.execute(sqlquery, insert_values)
dbconnect.commit()
print(crsr.rowcount, " record inserted.")
def hash_password(self, password):
"""Hash a password for storing."""
salt = hashlib.sha256(os.urandom(60)).hexdigest().encode('ascii')
pwdhash = hashlib.pbkdf2_hmac('sha512', password.encode('utf-8'),
salt, 10000)
pwdhash = binascii.hexlify(pwdhash)
return (salt + pwdhash).decode('ascii')
class LoginWindow(Screen):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.db = DbCon()
def validate_user(self):
user = self.ids.username_field
pwd = self.ids.pwd_field
info = self.ids.info
global myname, roomno
uname = user.text
passw = pwd.text
if uname == '' or passw == '':
info.text = '[color=#FF0000]username and/ or password required[/color]'
else:
row = self.db.get_row(uname)
if row == '':
info.text = '[color=#FF0000]Invalid Username and/or Password[/color]'
else:
stored_passwd = row[1]
myname = row[3]
roomno = row[4]
print(stored_passwd)
print(passw)
if self.verify_password(stored_passwd, passw):
# if stored_passwd == passw:
if row[5] == 1:
info.text = '[color=#00FF00]Logged In successfully!!![/color]'
self.parent.current = 'menu'
else:
info.text = '[color=#FF0000]Sorry, you are not authorised. Please collect authorisation details from administrator.[/color]'
else:
info.text = '[color=#FF0000]Incorrect Password[/color]'
def verify_password(self, stored_password, provided_password):
"""Verify a stored password against one provided by user"""
salt = stored_password[:64]
stored_password = stored_password[64:]
pwdhash = hashlib.pbkdf2_hmac('sha512',
provided_password.encode('utf-8'),
salt.encode('ascii'),
10000)
pwdhash = binascii.hexlify(pwdhash).decode('ascii')
print(pwdhash)
return pwdhash == stored_password
class RegisterWindow(Screen):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.db2 = DbCon()
def register_user(self):
user = self.ids.username_field
pwd = self.ids.pwd_field
rpt_pwd = self.ids.pwd_field_rpt
fname = self.ids.full_name
empnum = self.ids.emp_no
roomnum = self.ids.room_no
info = self.ids.info
uname = user.text
passw = pwd.text
rpassw = rpt_pwd.text
name = fname.text
enum = empnum.text
rnum = roomnum.text
unameokflag = False
passwokflag = False
nameokflag = False
enumokflag = False
rnumokflag = False
info.text = ''
if uname == '':
info.text += '[color=#FF0000]username required[/color]'
else:
unameokflag = True
if passw == '':
info.text += '\n[color=#FF0000]password required[/color]'
if rpassw == '':
info.text += '\n[color=#FF0000]repeat password required[/color]'
if name == '':
info.text += '\n[color=#FF0000]full name required[/color]'
else:
nameokflag = True
if enum == '':
info.text += '\n[color=#FF0000]mobile required[/color]'
elif len(enum) != 10 or enum.isdigit() != True:
info.text += '\n[color=#FF0000]not a valid mobile number[/color]'
else:
enumokflag = True
if rnum == '':
info.text += '\n[color=#FF0000]room number required[/color]'
else:
rnumokflag = True
if passw != '' and rpassw != '' and passw != rpassw:
info.text += '\n[color=#FF0000]passwords do not match[/color]'
else:
passwokflag = True
if unameokflag and passwokflag and nameokflag and enumokflag and rnumokflag:
self.db2.add_row(uname, passw, enum, name, rnum)
info.text += '\n[color=#FF0000]resident details successfully inserted[/color]'
class FoodiesApp(App):
def build(self):
return ScreenManagement()
if __name__=="__main__":
sa = FoodiesApp()
sa.run()
Kindly help me solve the issue.
Let me answer my own question. The problem is resolved. The error "Error type 3 Error: Activity class {org.test.kkfoodies/org.kivy.android.PythonActivity} does not exist." was causing utter confusion. This error was coming because the app was deployed on an android mobile phone where the minimum api requirement is not satisfied. (android.minapi = 21 option in buildozer spec file). Changing the option value to less than 21 also did not help as the buildozer version was not supporting deployment on older APIs.
The app was then tested on android phone which met the minimum api requirement. But the python library mysqldb was causing issues. mysqldb option was added in the requirements attribute and the recipe named mysqldb was tried. But, both the attempts failed.
Finally, the source code file main.py was modified -- mysql.connector library was used in place of MySQLdb. Also mysql_connector option was added in the requirements attribute of buildozer.spec file. That did the trick. The app is working smoothly on android phones now (Android version Lollipop and later). I thank every one of you for the help extended.

MD5 calculation for multipart amazon s3 uploading. android/java [duplicate]

Files uploaded to Amazon S3 that are smaller than 5GB have an ETag that is simply the MD5 hash of the file, which makes it easy to check if your local files are the same as what you put on S3.
But if your file is larger than 5GB, then Amazon computes the ETag differently.
For example, I did a multipart upload of a 5,970,150,664 byte file in 380 parts. Now S3 shows it to have an ETag of 6bcf86bed8807b8e78f0fc6e0a53079d-380. My local file has an md5 hash of 702242d3703818ddefe6bf7da2bed757. I think the number after the dash is the number of parts in the multipart upload.
I also suspect that the new ETag (before the dash) is still an MD5 hash, but with some meta data included along the way from the multipart upload somehow.
Does anyone know how to compute the ETag using the same algorithm as Amazon S3?
Say you uploaded a 14MB file to a bucket without server-side encryption, and your part size is 5MB. Calculate 3 MD5 checksums corresponding to each part, i.e. the checksum of the first 5MB, the second 5MB, and the last 4MB. Then take the checksum of their concatenation. MD5 checksums are often printed as hex representations of binary data, so make sure you take the MD5 of the decoded binary concatenation, not of the ASCII or UTF-8 encoded concatenation. When that's done, add a hyphen and the number of parts to get the ETag.
Here are the commands to do it on Mac OS X from the console:
$ dd bs=1m count=5 skip=0 if=someFile | md5 >>checksums.txt
5+0 records in
5+0 records out
5242880 bytes transferred in 0.019611 secs (267345449 bytes/sec)
$ dd bs=1m count=5 skip=5 if=someFile | md5 >>checksums.txt
5+0 records in
5+0 records out
5242880 bytes transferred in 0.019182 secs (273323380 bytes/sec)
$ dd bs=1m count=5 skip=10 if=someFile | md5 >>checksums.txt
2+1 records in
2+1 records out
2599812 bytes transferred in 0.011112 secs (233964895 bytes/sec)
At this point all the checksums are in checksums.txt. To concatenate them and decode the hex and get the MD5 checksum of the lot, just use
$ xxd -r -p checksums.txt | md5
And now append "-3" to get the ETag, since there were 3 parts.
Notes
If you uploaded with aws-cli via aws s3 cp then you most likely have a 8MB chunksize. According to the docs, that is the default.
If the bucket has server-side encryption (SSE) turned on, the ETag won't be the MD5 checksum (see the API documentation). But if you're just trying to verify that an uploaded part matches what you sent, you can use the Content-MD5 header and S3 will compare it for you.
md5 on macOS just writes out the checksum, but md5sum on Linux/brew also outputs the filename. You'll need to strip that, but I'm sure there's some option to only output the checksums. You don't need to worry about whitespace cause xxd will ignore it.
Code Links
A Gist I wrote with a working script for macOS.
The project at s3md5.
Based on answers here, I wrote a Python implementation which correctly calculates both multi-part and single-part file ETags.
def calculate_s3_etag(file_path, chunk_size=8 * 1024 * 1024):
md5s = []
with open(file_path, 'rb') as fp:
while True:
data = fp.read(chunk_size)
if not data:
break
md5s.append(hashlib.md5(data))
if len(md5s) < 1:
return '"{}"'.format(hashlib.md5().hexdigest())
if len(md5s) == 1:
return '"{}"'.format(md5s[0].hexdigest())
digests = b''.join(m.digest() for m in md5s)
digests_md5 = hashlib.md5(digests)
return '"{}-{}"'.format(digests_md5.hexdigest(), len(md5s))
The default chunk_size is 8 MB used by the official aws cli tool, and it does multipart upload for 2+ chunks. It should work under both Python 2 and 3.
bash implementation
python implementation
The algorithm literally is (copied from the readme in the python implementation) :
md5 the chunks
glob the md5 strings together
convert the glob to binary
md5 the binary of the globbed chunk md5s
append "-Number_of_chunks" to the end of the md5 string of the binary
Here's yet another piece in this crazy AWS challenge puzzle.
FWIW, this answer assumes you already have figured out how to calculate the "MD5 of MD5 parts" and can rebuild your AWS Multi-part ETag from all the other answers already provided here.
What this answer addresses is the annoyance of having to "guess" or otherwise "divine" the original upload part size.
We use several different tools for uploading to S3 and they all seem to have different upload part sizes, so "guessing" really wasn't an option. Also, we have a lot of files that were historically uploaded when part sizes seemed to be different. Also, the old trick of using an internal server copy to force the creation of an MD5-type ETag also no longer works as AWS has changed their internal server copies to also use multi-part (just with a fairly large part size).
So...
How can you figure out the object's part size?
Well, if you first make a head_object request and detect that the ETag is a multi-part type ETag (includes a '-<partcount>' at the end), then you can make another head_object request, but with an additional part_number attribute of 1 (the first part). This follow-on head_object request will then return you the content_length of the first part. Viola... Now you know the part size that was used and you can use that size to re-create your local ETag which should match the original uploaded S3 ETag created when the object was uploaded.
Additionally, if you wanted to be exact (perhaps some multi-part uploads were to use variable part sizes), then you could continue to call head_object requests with each part_number specified and calculate each part's MD5 from the returned parts content_length.
Hope that helps...
Not sure if it can help:
We're currently doing an ugly (but so far useful) hack to fix those wrong ETags in multipart uploaded files, which consists on applying a change to the file in the bucket; that triggers a md5 recalculation from Amazon that changes the ETag to matches with the actual md5 signature.
In our case:
File: bucket/Foo.mpg.gpg
ETag obtained: "3f92dffef0a11d175e60fb8b958b4e6e-2"
Do something with the file (rename it, add a meta-data like a fake header, among others)
Etag obtained: "c1d903ca1bb6dc68778ef21e74cc15b0"
We don't know the algorithm, but since we can "fix" the ETag we don't need to worry about it either.
Same algorithm, java version:
(BaseEncoding, Hasher, Hashing, etc comes from the guava library
/**
* Generate checksum for object came from multipart upload</p>
* </p>
* AWS S3 spec: Entity tag that identifies the newly created object's data. Objects with different object data will have different entity tags. The entity tag is an opaque string. The entity tag may or may not be an MD5 digest of the object data. If the entity tag is not an MD5 digest of the object data, it will contain one or more nonhexadecimal characters and/or will consist of less than 32 or more than 32 hexadecimal digits.</p>
* Algorithm follows AWS S3 implementation: https://github.com/Teachnova/s3md5</p>
*/
private static String calculateChecksumForMultipartUpload(List<String> md5s) {
StringBuilder stringBuilder = new StringBuilder();
for (String md5:md5s) {
stringBuilder.append(md5);
}
String hex = stringBuilder.toString();
byte raw[] = BaseEncoding.base16().decode(hex.toUpperCase());
Hasher hasher = Hashing.md5().newHasher();
hasher.putBytes(raw);
String digest = hasher.hash().toString();
return digest + "-" + md5s.size();
}
According to the AWS documentation the ETag isn't an MD5 hash for a multi-part upload nor for an encrypted object: http://docs.aws.amazon.com/AmazonS3/latest/API/RESTCommonResponseHeaders.html
Objects created by the PUT Object, POST Object, or Copy operation, or through the AWS Management Console, and are encrypted by SSE-S3 or plaintext, have ETags that are an MD5 digest of their object data.
Objects created by the PUT Object, POST Object, or Copy operation, or through the AWS Management Console, and are encrypted by SSE-C or SSE-KMS, have ETags that are not an MD5 digest of their object data.
If an object is created by either the Multipart Upload or Part Copy operation, the ETag is not an MD5 digest, regardless of the method of encryption.
In an above answer, someone asked if there was a way to get the md5 for files larger than 5G.
An answer that I could give for getting the MD5 value (for files larger than 5G) would be to either add it manually to the metadata, or use a program to do your uploads which will add the information.
For example, I used s3cmd to upload a file, and it added the following metadata.
$ aws s3api head-object --bucket xxxxxxx --key noarch/epel-release-6-8.noarch.rpm
{
"AcceptRanges": "bytes",
"ContentType": "binary/octet-stream",
"LastModified": "Sat, 19 Sep 2015 03:27:25 GMT",
"ContentLength": 14540,
"ETag": "\"2cd0ae668a585a14e07c2ea4f264d79b\"",
"Metadata": {
"s3cmd-attrs": "uid:502/gname:staff/uname:xxxxxx/gid:20/mode:33188/mtime:1352129496/atime:1441758431/md5:2cd0ae668a585a14e07c2ea4f264d79b/ctime:1441385182"
}
}
It isn't a direct solution using the ETag, but it is a way to populate the metadata you want (MD5) in a way you can access it. It will still fail if someone uploads the file without metadata.
Here is the algorithm in ruby...
require 'digest'
# PART_SIZE should match the chosen part size of the multipart upload
# Set here as 10MB
PART_SIZE = 1024*1024*10
class File
def each_part(part_size = PART_SIZE)
yield read(part_size) until eof?
end
end
file = File.new('<path_to_file>')
hashes = []
file.each_part do |part|
hashes << Digest::MD5.hexdigest(part)
end
multipart_hash = Digest::MD5.hexdigest([hashes.join].pack('H*'))
multipart_etag = "#{multipart_hash}-#{hashes.count}"
Thanks to Shortest Hex2Bin in Ruby and Multipart Uploads to S3 ...
node.js implementation -
const fs = require('fs');
const crypto = require('crypto');
const chunk = 1024 * 1024 * 5; // 5MB
const md5 = data => crypto.createHash('md5').update(data).digest('hex');
const getEtagOfFile = (filePath) => {
const stream = fs.readFileSync(filePath);
if (stream.length <= chunk) {
return md5(stream);
}
const md5Chunks = [];
const chunksNumber = Math.ceil(stream.length / chunk);
for (let i = 0; i < chunksNumber; i++) {
const chunkStream = stream.slice(i * chunk, (i + 1) * chunk);
md5Chunks.push(md5(chunkStream));
}
return `${md5(Buffer.from(md5Chunks.join(''), 'hex'))}-${chunksNumber}`;
};
And here is a PHP version of calculating the ETag:
function calculate_aws_etag($filename, $chunksize) {
/*
DESCRIPTION:
- calculate Amazon AWS ETag used on the S3 service
INPUT:
- $filename : path to file to check
- $chunksize : chunk size in Megabytes
OUTPUT:
- ETag (string)
*/
$chunkbytes = $chunksize*1024*1024;
if (filesize($filename) < $chunkbytes) {
return md5_file($filename);
} else {
$md5s = array();
$handle = fopen($filename, 'rb');
if ($handle === false) {
return false;
}
while (!feof($handle)) {
$buffer = fread($handle, $chunkbytes);
$md5s[] = md5($buffer);
unset($buffer);
}
fclose($handle);
$concat = '';
foreach ($md5s as $indx => $md5) {
$concat .= hex2bin($md5);
}
return md5($concat) .'-'. count($md5s);
}
}
$etag = calculate_aws_etag('path/to/myfile.ext', 8);
And here is an enhanced version that can verify against an expected ETag - and even guess the chunksize if you don't know it!
function calculate_etag($filename, $chunksize, $expected = false) {
/*
DESCRIPTION:
- calculate Amazon AWS ETag used on the S3 service
INPUT:
- $filename : path to file to check
- $chunksize : chunk size in Megabytes
- $expected : verify calculated etag against this specified etag and return true or false instead
- if you make chunksize negative (eg. -8 instead of 8) the function will guess the chunksize by checking all possible sizes given the number of parts mentioned in $expected
OUTPUT:
- ETag (string)
- or boolean true|false if $expected is set
*/
if ($chunksize < 0) {
$do_guess = true;
$chunksize = 0 - $chunksize;
} else {
$do_guess = false;
}
$chunkbytes = $chunksize*1024*1024;
$filesize = filesize($filename);
if ($filesize < $chunkbytes && (!$expected || !preg_match("/^\\w{32}-\\w+$/", $expected))) {
$return = md5_file($filename);
if ($expected) {
$expected = strtolower($expected);
return ($expected === $return ? true : false);
} else {
return $return;
}
} else {
$md5s = array();
$handle = fopen($filename, 'rb');
if ($handle === false) {
return false;
}
while (!feof($handle)) {
$buffer = fread($handle, $chunkbytes);
$md5s[] = md5($buffer);
unset($buffer);
}
fclose($handle);
$concat = '';
foreach ($md5s as $indx => $md5) {
$concat .= hex2bin($md5);
}
$return = md5($concat) .'-'. count($md5s);
if ($expected) {
$expected = strtolower($expected);
$matches = ($expected === $return ? true : false);
if ($matches || $do_guess == false || strlen($expected) == 32) {
return $matches;
} else {
// Guess the chunk size
preg_match("/-(\\d+)$/", $expected, $match);
$parts = $match[1];
$min_chunk = ceil($filesize / $parts /1024/1024);
$max_chunk = floor($filesize / ($parts-1) /1024/1024);
$found_match = false;
for ($i = $min_chunk; $i <= $max_chunk; $i++) {
if (calculate_aws_etag($filename, $i) === $expected) {
$found_match = true;
break;
}
}
return $found_match;
}
} else {
return $return;
}
}
}
The short answer is that you take the 128bit binary md5 digest of each part, concatenate them into a document, and hash that document. The algorithm presented in this answer is accurate.
Note: the multipart ETAG form with the hyphen will change to the form without the hyphen if you "touch" the blob (even without modifying the content). That is, if you copy, or do an in-place copy of your completed multipart-uploaded object (aka PUT-COPY), S3 will recompute the ETAG with the simple version of the algorithm. i.e. the destination object will have an etag without the hyphen.
You've probably considered this already, but if your files are less than 5GB, and you already know their MD5s, and upload parallelization provides little to no benefit (e.g. you are streaming the upload from a slow network, or uploading from a slow disk), then you may also consider using a simple PUT instead of a multipart PUT, and pass your known Content-MD5 in your request headers -- amazon will fail the upload if they don't match. Keep in mind that you get charged for each UploadPart.
Furthermore, in some clients, passing a known MD5 for the input of a PUT operation will save the client from recomputing the MD5 during the transfer. In boto3 (python), you would use the ContentMD5 parameter of the client.put_object() method, for instance. If you omit the parameter, and you already knew the MD5, then the client would be wasting cycles computing it again before the transfer.
Working algorithm implemented in Node.js (TypeScript).
/**
* Generate an S3 ETAG for multipart uploads in Node.js
* An implementation of this algorithm: https://stackoverflow.com/a/19896823/492325
* Author: Richard Willis <willis.rh#gmail.com>
*/
import fs from 'node:fs';
import crypto, { BinaryLike } from 'node:crypto';
const defaultPartSizeInBytes = 5 * 1024 * 1024; // 5MB
function md5(contents: string | BinaryLike): string {
return crypto.createHash('md5').update(contents).digest('hex');
}
export function getS3Etag(
filePath: string,
partSizeInBytes = defaultPartSizeInBytes
): string {
const { size: fileSizeInBytes } = fs.statSync(filePath);
let parts = Math.floor(fileSizeInBytes / partSizeInBytes);
if (fileSizeInBytes % partSizeInBytes > 0) {
parts += 1;
}
const fileDescriptor = fs.openSync(filePath, 'r');
let totalMd5 = '';
for (let part = 0; part < parts; part++) {
const skipBytes = partSizeInBytes * part;
const totalBytesLeft = fileSizeInBytes - skipBytes;
const bytesToRead = Math.min(totalBytesLeft, partSizeInBytes);
const buffer = Buffer.alloc(bytesToRead);
fs.readSync(fileDescriptor, buffer, 0, bytesToRead, skipBytes);
totalMd5 += md5(buffer);
}
const combinedHash = md5(Buffer.from(totalMd5, 'hex'));
const etag = `${combinedHash}-${parts}`;
return etag;
}
I've published this to npm
npm install s3-etag
import { generateETag } from 's3-etag';
const etag = generateETag(absoluteFilePath, partSizeInBytes);
View project here: https://github.com/badsyntax/s3-etag
A version in Rust:
use crypto::digest::Digest;
use crypto::md5::Md5;
use std::fs::File;
use std::io::prelude::*;
use std::iter::repeat;
fn calculate_etag_from_read(f: &mut dyn Read, chunk_size: usize) -> Result<String> {
let mut md5 = Md5::new();
let mut concat_md5 = Md5::new();
let mut input_buffer = vec![0u8; chunk_size];
let mut chunk_count = 0;
let mut current_md5: Vec<u8> = repeat(0).take((md5.output_bits() + 7) / 8).collect();
let md5_result = loop {
let amount_read = f.read(&mut input_buffer)?;
if amount_read > 0 {
md5.reset();
md5.input(&input_buffer[0..amount_read]);
chunk_count += 1;
md5.result(&mut current_md5);
concat_md5.input(&current_md5);
} else {
if chunk_count > 1 {
break format!("{}-{}", concat_md5.result_str(), chunk_count);
} else {
break md5.result_str();
}
}
};
Ok(md5_result)
}
fn calculate_etag(file: &String, chunk_size: usize) -> Result<String> {
let mut f = File::open(file)?;
calculate_etag_from_read(&mut f, chunk_size)
}
See a repo with a simple implementation: https://github.com/bn3t/calculate-etag/tree/master
Regarding chunk size, I noticed that it seems to depend of number of parts.
The maximun number of parts are 10000 as AWS documents.
So starting on a default of 8MB and knowing the filesize, chunk size and parts can be calculated as follows:
chunk_size=8*1024*1024
flsz=os.path.getsize(fl)
while flsz/chunk_size>10000:
chunk_size*=2
parts=math.ceil(flsz/chunk_size)
Parts have to be up-rounded
Extending Timothy Gonzalez's answer:
Identical files will have different etag when using multipart upload.
It's easy to test it with WinSCP, because it uses multipart upload.
When I upload multiple indentical copies of the same file to S3 via WinSCP then each has different etag. When I download them and calculate md5, then they are still indentical.
So from what I tested different etags doesn't mean that files are different.
I see no alternative way to obtain any hash for S3 files without downloading them first.
This is true for multipart uploads. For not-multipart it should still be possible to calculate etag locally.
I have a solution for iOS and macOS without using external helpers like dd and xxd. I have just found it, so I report it as it is, planning to improve it at a later stage. For the moment, it relies on both Objective-C and Swift code. First of all, create this helper class in Objective-C:
AWS3MD5Hash.h
#import <Foundation/Foundation.h>
NS_ASSUME_NONNULL_BEGIN
#interface AWS3MD5Hash : NSObject
- (NSData *)dataFromFile:(FILE *)theFile startingOnByte:(UInt64)startByte length:(UInt64)length filePath:(NSString *)path singlePartSize:(NSUInteger)partSizeInMb;
- (NSData *)dataFromBigData:(NSData *)theData startingOnByte:(UInt64)startByte length:(UInt64)length;
- (NSData *)dataFromHexString:(NSString *)sourceString;
#end
NS_ASSUME_NONNULL_END
AWS3MD5Hash.m
#import "AWS3MD5Hash.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define SIZE 256
#implementation AWS3MD5Hash
- (NSData *)dataFromFile:(FILE *)theFile startingOnByte:(UInt64)startByte length:(UInt64)length filePath:(NSString *)path singlePartSize:(NSUInteger)partSizeInMb {
char *buffer = malloc(length);
NSURL *fileURL = [NSURL fileURLWithPath:path];
NSNumber *fileSizeValue = nil;
NSError *fileSizeError = nil;
[fileURL getResourceValue:&fileSizeValue
forKey:NSURLFileSizeKey
error:&fileSizeError];
NSInteger __unused result = fseek(theFile,startByte,SEEK_SET);
if (result != 0) {
free(buffer);
return nil;
}
NSInteger result2 = fread(buffer, length, 1, theFile);
NSUInteger difference = fileSizeValue.integerValue - startByte;
NSData *toReturn;
if (result2 == 0) {
toReturn = [NSData dataWithBytes:buffer length:difference];
} else {
toReturn = [NSData dataWithBytes:buffer length:result2 * length];
}
free(buffer);
return toReturn;
}
- (NSData *)dataFromBigData:(NSData *)theData startingOnByte: (UInt64)startByte length:(UInt64)length {
NSUInteger fileSizeValue = theData.length;
NSData *subData;
if (startByte + length > fileSizeValue) {
subData = [theData subdataWithRange:NSMakeRange(startByte, fileSizeValue - startByte)];
} else {
subData = [theData subdataWithRange:NSMakeRange(startByte, length)];
}
return subData;
}
- (NSData *)dataFromHexString:(NSString *)string {
string = [string lowercaseString];
NSMutableData *data= [NSMutableData new];
unsigned char whole_byte;
char byte_chars[3] = {'\0','\0','\0'};
NSInteger i = 0;
NSInteger length = string.length;
while (i < length-1) {
char c = [string characterAtIndex:i++];
if (c < '0' || (c > '9' && c < 'a') || c > 'f')
continue;
byte_chars[0] = c;
byte_chars[1] = [string characterAtIndex:i++];
whole_byte = strtol(byte_chars, NULL, 16);
[data appendBytes:&whole_byte length:1];
}
return data;
}
#end
Now create a plain swift file:
AWS Extensions.swift
import UIKit
import CommonCrypto
extension URL {
func calculateAWSS3MD5Hash(_ numberOfParts: UInt64) -> String? {
do {
var fileSize: UInt64!
var calculatedPartSize: UInt64!
let attr:NSDictionary? = try FileManager.default.attributesOfItem(atPath: self.path) as NSDictionary
if let _attr = attr {
fileSize = _attr.fileSize();
if numberOfParts != 0 {
let partSize = Double(fileSize / numberOfParts)
var partSizeInMegabytes = Double(partSize / (1024.0 * 1024.0))
partSizeInMegabytes = ceil(partSizeInMegabytes)
calculatedPartSize = UInt64(partSizeInMegabytes)
if calculatedPartSize % 2 != 0 {
calculatedPartSize += 1
}
if numberOfParts == 2 || numberOfParts == 3 { // Very important when there are 2 or 3 parts, in the majority of times
// the calculatedPartSize is already 8. In the remaining cases we force it.
calculatedPartSize = 8
}
if mainLogToggling {
print("The calculated part size is \(calculatedPartSize!) Megabytes")
}
}
}
if numberOfParts == 0 {
let string = self.memoryFriendlyMd5Hash()
return string
}
let hasher = AWS3MD5Hash.init()
let file = fopen(self.path, "r")
defer { let result = fclose(file)}
var index: UInt64 = 0
var bigString: String! = ""
var data: Data!
while autoreleasepool(invoking: {
if index == (numberOfParts-1) {
if mainLogToggling {
//print("Siamo all'ultima linea.")
}
}
data = hasher.data(from: file!, startingOnByte: index * calculatedPartSize * 1024 * 1024, length: calculatedPartSize * 1024 * 1024, filePath: self.path, singlePartSize: UInt(calculatedPartSize))
bigString = bigString + MD5.get(data: data) + "\n"
index += 1
if index == numberOfParts {
return false
}
return true
}) {}
let final = MD5.get(data :hasher.data(fromHexString: bigString)) + "-\(numberOfParts)"
return final
} catch {
}
return nil
}
func memoryFriendlyMd5Hash() -> String? {
let bufferSize = 1024 * 1024
do {
// Open file for reading:
let file = try FileHandle(forReadingFrom: self)
defer {
file.closeFile()
}
// Create and initialize MD5 context:
var context = CC_MD5_CTX()
CC_MD5_Init(&context)
// Read up to `bufferSize` bytes, until EOF is reached, and update MD5 context:
while autoreleasepool(invoking: {
let data = file.readData(ofLength: bufferSize)
if data.count > 0 {
data.withUnsafeBytes {
_ = CC_MD5_Update(&context, $0, numericCast(data.count))
}
return true // Continue
} else {
return false // End of file
}
}) { }
// Compute the MD5 digest:
var digest = Data(count: Int(CC_MD5_DIGEST_LENGTH))
digest.withUnsafeMutableBytes {
_ = CC_MD5_Final($0, &context)
}
let hexDigest = digest.map { String(format: "%02hhx", $0) }.joined()
return hexDigest
} catch {
print("Cannot open file:", error.localizedDescription)
return nil
}
}
struct MD5 {
static func get(data: Data) -> String {
var digest = [UInt8](repeating: 0, count: Int(CC_MD5_DIGEST_LENGTH))
let _ = data.withUnsafeBytes { bytes in
CC_MD5(bytes, CC_LONG(data.count), &digest)
}
var digestHex = ""
for index in 0..<Int(CC_MD5_DIGEST_LENGTH) {
digestHex += String(format: "%02x", digest[index])
}
return digestHex
}
// The following is a memory friendly version
static func get2(data: Data) -> String {
var currentIndex = 0
let bufferSize = 1024 * 1024
//var digest = [UInt8](repeating: 0, count: Int(CC_MD5_DIGEST_LENGTH))
// Create and initialize MD5 context:
var context = CC_MD5_CTX()
CC_MD5_Init(&context)
while autoreleasepool(invoking: {
var subData: Data!
if (currentIndex + bufferSize) < data.count {
subData = data.subdata(in: Range.init(NSMakeRange(currentIndex, bufferSize))!)
currentIndex = currentIndex + bufferSize
} else {
subData = data.subdata(in: Range.init(NSMakeRange(currentIndex, data.count - currentIndex))!)
currentIndex = currentIndex + (data.count - currentIndex)
}
if subData.count > 0 {
subData.withUnsafeBytes {
_ = CC_MD5_Update(&context, $0, numericCast(subData.count))
}
return true
} else {
return false
}
}) { }
// Compute the MD5 digest:
var digest = Data(count: Int(CC_MD5_DIGEST_LENGTH))
digest.withUnsafeMutableBytes {
_ = CC_MD5_Final($0, &context)
}
var digestHex = ""
for index in 0..<Int(CC_MD5_DIGEST_LENGTH) {
digestHex += String(format: "%02x", digest[index])
}
return digestHex
}
}
Now add:
#import "AWS3MD5Hash.h"
to your Objective-C Bridging header. You should be ok with this setup.
Example usage
To test this setup, you could be calling the following method inside the object that is in charge of handling the AWS connections:
func getMd5HashForFile() {
let credentialProvider = AWSCognitoCredentialsProvider(regionType: AWSRegionType.USEast2, identityPoolId: "<INSERT_POOL_ID>")
let configuration = AWSServiceConfiguration(region: AWSRegionType.APSoutheast2, credentialsProvider: credentialProvider)
configuration?.timeoutIntervalForRequest = 3.0
configuration?.timeoutIntervalForResource = 3.0
AWSServiceManager.default().defaultServiceConfiguration = configuration
AWSS3.register(with: configuration!, forKey: "defaultKey")
let s3 = AWSS3.s3(forKey: "defaultKey")
let headObjectRequest = AWSS3HeadObjectRequest()!
headObjectRequest.bucket = "<NAME_OF_YOUR_BUCKET>"
headObjectRequest.key = self.latestMapOnServer.key
let _: AWSTask? = s3.headObject(headObjectRequest).continueOnSuccessWith { (awstask) -> Any? in
let headObjectOutput: AWSS3HeadObjectOutput? = awstask.result
var ETag = headObjectOutput?.eTag!
// Here you should parse the returned Etag and extract the number of parts to provide to the helper function. Etags end with a "-" followed by the number of parts. If you don't see this format, then pass 0 as the number of parts.
ETag = ETag!.replacingOccurrences(of: "\"", with: "")
print("headObjectOutput.ETag \(ETag!)")
let mapOnDiskUrl = self.getMapsDirectory().appendingPathComponent(self.latestMapOnDisk!)
let hash = mapOnDiskUrl.calculateAWSS3MD5Hash(<Take the number of parts from the ETag returned by the server>)
if hash == ETag {
print("They are the same.")
}
print ("\(hash!)")
return nil
}
}
If the ETag returned by the server does not have "-" at the end of the ETag, just pass 0 to calculateAWSS3MD5Hash. Please comment if you encounter any problems. I am working on a swift only solution, I will update this answer as soon as I finish. Thanks
I just saw that the AWS S3 Console 'upload' uses an unusual part (chunk) size of 17,179,870 - at least for larger files.
Using that part size gave me the correct ETag hash using the methods described earlier. Thanks to #TheStoryCoder for the php version.
Thanks to #hans for his idea to use head-object to see the actual sizes of each part.
I used the AWS S3 Console (on Nov28 2020) to upload about 50 files ranging in size from 190MB to 2.3GB and all of them had the same part size of 17,179,870.
I liked Emerson's leading answer above - especially the xxd part - but I was too lazy to use dd so I went with split, guessing at an 8M chunk size because I uploaded with aws s3 cp:
$ split -b 8M large.iso XXX
$ md5sum XXX* > checksums.txt
$ sed -i 's/ .*$//' checksums.txt
$ xxd -r -p checksums.txt | md5sum
99a090df013d375783f0f0be89288529 -
$ wc -l checksums.txt
80 checksums.txt
$
It was immediately obvious that both parts of my S3 etag matched my file's calculated etag.
UPDATE:
This has been working nicely:
$ ll large.iso
-rw-rw-r-- 1 user user 669134848 Apr 12 2021 large.iso
$
$ etag large.iso
99a090df013d375783f0f0be89288529-80
$
$ type etag
etag is a function
etag ()
{
split -b 8M --filter=md5sum $1 | cut -d' ' -f1 | pee "xxd -r -p | md5sum | cut -d' ' -f1" "wc -l" | paste -d'-' - -
}
$
All the other answers assume a standard and regular part size. But that assumption may not be true. Across the console and various SDKs there are different defaults. And the low-level API does allow a lot of variety.
Complications:
S3 multi-part uploads can have parts of any size (within a min and max for non-last parts).
Even the non-last parts can be different sizes.
When you upload they don't have to be consecutive part numbers.
If you do a multi-part upload with only 1 part, the etag is the more complicated version, not the simple MD5
etags tend to be wrapped in double-quotes. I don't know why. But that's just a thing that might trip you up.
So we need find find out how many parts there are, and how big they are.
You cannot reliably get the part count from boto3's Object.parts_count attribute. I don't know if the same is true of other SDKs.
The get_object_attributes API documentation claims that it returns a list of parts and sizes. But when I tested those fields were missing. Even for multi-part uploads that were not completed.
Even if you assume equal part sizes (except the last part), you cannot deduce part size from content length and part count. e.g. if a 90MB file has 3 parts, was that 30MBx3, or 40MB+40MB+10MB?
Let's assume that you have a local file and you want to check whether it matches the content of the object in S3.
(And assume that you've already checked whether the lengths differ, because that's a faster check.)
Here's a python3 script to do that. (I chose python just because that's what I'm familiar with.)
We use head_object to get the e-tag. With the e-tag we can deduce whether it was a single-part upload or multi-part, and how many parts.
We use head_object passing in PartNumber, calling that for each part, to get the length of each part. You could use multiprocessing to speed that up. (Noting that boto3's client should not be passed between processes.)
import boto3
from hashlib import md5
def content_matches(local_path, bucket, key) -> bool:
client = boto3.client('s3')
resp = client.head_object(Bucket=bucket, Key=key)
remote_e_tag = resp['ETag']
total_length = resp['ContentLength']
if '-' not in remote_e_tag:
# it was a single-part upload
m = md5()
# you could read from the file in chunks to avoid loading the whole thing into memory
# the chunks would not have to match any SDK standard. It can be whatever you want.
# (The MD5 library will act as if you hashed in one go)
with open(file, 'rb') as f:
local_etag = f'"md5(f.read()).hexdigest()"'
return local_etag == remote_e_tag
else:
# multi-part upload
# to find the number of parts, get it from the e-tag
# e.g. 123-56 has 56 parts
num_parts = int(remote_e_tag.strip('"').split('-')[-1])
print(f"Assuming {num_parts=} from {remote_e_tag=}")
md5s = []
with open(local_path, 'rb') as f:
sz_read = 0
for part_num in range(1,num_parts+1):
resp = client.head_object(Bucket=bucket, Key=key, PartNumber=part_num)
sz_read += resp['ContentLength']
local_data_part = f.read(resp['ContentLength'])
assert len(local_data_part) == resp['ContentLength'] # sanity check
md5s.append(md5(local_data_part))
assert sz_read == total_length, "Sum of part sizes doesn't equal total file size"
digests = b''.join(m.digest() for m in md5s)
digests_md5 = md5(digests)
local_etag = f'"{digests_md5.hexdigest()}-{len(md5s)}"'
return remote_e_tag == local_etag
And a script to test it with all those edge cases:
import boto3
from pprint import pprint
from hashlib import md5
from main import content_matches
MB = 2 ** 20
bucket = 'mybucket'
key = 'test-multi-part-upload'
local_path = 'test-data'
# first upload the object
s3 = boto3.resource('s3')
obj = s3.Object(bucket, key)
mpu = obj.initiate_multipart_upload()
parts = []
part_sizes = [6 * MB, 5 * MB, 5] # deliberately non-standard and not consistent
upload_part_nums = [1,3,8] # test non-consecutive part numbers for upload
with open(local_path, 'wb') as fw:
with open('/dev/random', 'rb') as fr:
for (part_num, part_size) in zip(upload_part_nums, part_sizes):
part = mpu.Part(part_num)
data = fr.read(part_size)
print(f"Uploading part {part_num}")
resp = part.upload(Body=data)
parts.append({
'ETag': resp['ETag'],
'PartNumber': part_num
})
fw.write(data)
resp = mpu.complete(MultipartUpload={
'Parts': parts
})
obj.reload()
assert content_matches(local_path, bucket, key)
"#wim Any idea how to calculate the ETag when SSE is enabled?"
in my testing, multipart+SEE-C, the Etag is valid.
can be calculated from the individual Etag returned for each part.
and this is easy to prove.
let's say we have a multipart upload with SEE-C, with 10 parts.
take the 10 Etags, put them in a file, and run "xxd -r -p checksums.txt | md5sum", the calculdated value with match the value returned from aws
etag parts
-------------------------------
1330e1275b556ab6702bca9438f62c15 -
ae55d3ddf52e33d45140a5be6dacb925 -
16dc956e05962b84ad9cd74a05e86797 -
64be66992a5110c4b1151a8249258a1a -
4926df0200fe24499524176d6a85e347 -
2b6655c3506481eb1fae6b2e2e7c4b8b -
a02e9dbd49039eaf4d6de1fddc5e1a30 -
afb7bc1f6e0c1f23671cb7116f3b0c63 -
dddf3a1ab192f26bb483a3e2778bab13 -
adb8b2b761640418856853f3810ac45a -
-------------------------------
etag_from_aws = c68db040f8a36c164259bcca40c36410-10
etag_calculated = c68db040f8a36c164259bcca40c36410-10
No,
Till now there is not solution to match normal file ETag and Multipart file ETag and MD5 of local file.

jython Can't list error

I am trying to convert a python class into Java byte code with Jython (on mac osx lion)
./jython -m compileall /Users/owengerig/Downloads/Code\
Downloads/cryptPYTHON.py
but get this error, which gives no indication of whats wrong
Listing /Users/owengerig/Downloads/Code Downloads/cryptPYTHON.py ...
Can't list /Users/owengerig/Downloads/Code Downloads/cryptPYTHON.py
How my python class is setup (used this post as example):
from Crypto.Cipher import AES
import base64
import os
class Crypticle(CryptInterface):
"""Authenticated encryption class
* #param string $key base64-encoded encryption key
* #param integer $key_len length of raw key in bits
Encryption algorithm: AES-CBC
Signing algorithm: HMAC-SHA256
"""
AES_BLOCK_SIZE = 16
#JAVA
def __init__(self, key_string, key_size=192):
assert not key_size % 8
self.key = self.extract_key(key_string, key_size)
self.key_size = key_size
#classmethod
def generate_key_string(cls, key_size=192):
key = os.urandom(key_size / 8)
return base64.urlsafe_b64encode(str(key))
#classmethod
def extract_key(cls, key_string, key_size):
key = base64.urlsafe_b64decode(str(key_string))
assert len(key) == key_size / 8, "invalid key"
return key
#JAVA(String, String)
def encrypt(self, data):
"""encrypt data with AES-CBC"""
aes_key = self.key
pad = self.AES_BLOCK_SIZE - len(data) % self.AES_BLOCK_SIZE
data = data + pad * chr(pad)
iv_bytes = os.urandom(self.AES_BLOCK_SIZE)
cypher = AES.new(aes_key, AES.MODE_CBC, iv_bytes)
data = iv_bytes + cypher.encrypt(data)
data_str = base64.urlsafe_b64encode(str(data))
return data_str
#JAVA(String, String)
def decrypt(self, data_str):
"""decrypt data with AES-CBC"""
aes_key = self.key
data = base64.urlsafe_b64decode(data_str)
iv_bytes = data[:self.AES_BLOCK_SIZE]
data = data[self.AES_BLOCK_SIZE:]
cypher = AES.new(aes_key, AES.MODE_CBC, iv_bytes)
data = cypher.decrypt(data)
return data[:-ord(data[-1])]
Also tried this code (per comments below) but go the same error:
class Employee(Object):
def __init__(self):
self.first = "Josh"
self.last = "Juneau"
self.id = "myempid"
def getEmployeeFirst(self):
return self.first
def getEmployeeLast(self):
return self.last
def getEmployeeId(self):
return self.id
-m compileall takes a directory, not a filename. So you need to execute the following:
./jython -m compileall /Users/owengerig/Downloads/Code\ Downloads/
Long Explanation
If you open jythondirectory/Lib/compileall.py:
try:
names = os.listdir(dir)
except os.error:
print "Can't list", dir
names = []
os.listdir() throws an error if it isn't passed a directory as its argument. Since this is the function used to compile the command-line arguments, and the main() function does not check if the arguments are directories, this will fail.
for dir in args:
if not compile_dir(dir, maxlevels, ddir,
force, rx, quiet):
success = 0
/Long Explanation

verify a rsa sign from java in php

Since a few days I've got a problem that I can't solve on my own:
On a JavaCard I generate a RSA KeyPair (length: 1024) and a signature (Mode:ALG_RSA_MD5_PKCS1).
Now I have to verify the signature in php.
From my JavaCard I get the exponent, modulus and the signature in hexadecimal:
$mod = '951ADDA04637190B6202BB52787D3C19160A383C80C2E7242D0A7850FDD80C1CD1CCCF1395F8CA0B20270E3BC6C86F78232D65D148258BEFD0884563C60AB2C327506FB4FA0095CF0B1C527D942155731451F790EC0A227D38613C9EBFB2E04A657B3BA5456B35F71E92E14B7E1CB38DB6572559BFCA3B0AD8AA061D48F68931';
$exp = '010001';
$sign ='75867D42BDE6DF1066D4AF69418FCDD4B0F19173141128DFEBC64AF6C014CB92D38F4824E52BB064A610E07C7783AE57AE993A792F15208FB199CB1F45B64623AACB7FBA07AD89513C8DBA893C9FA6939857AA2CA53AAD99D9A9C1C32DF4E2769FCACB72E2C2C495727D368D953A911D32E79E230751202714DD15C0B6A34782';
$plaintext = '01020304';
A Verification in Java is no problem. But know I have to verify the signature in PHP (I take phpseclib).
In PHP I generate my public_key with CRYPT_RSA_PUBLIC_FORMAT_RAW:
$rsa = new Crypt_RSA();
$pk = array(
'e' => new Math_BigInteger($exp, 16),
'n' => new Math_BigInteger($mod, 16)
);
$rsa->loadKey($pk, CRYPT_RSA_PUBLIC_FORMAT_RAW);
$rsa->setSignatureMode(CRYPT_RSA_SIGNATURE_PKCS1);
echo $rsa->verify($plaintext, $sign) ? 'verified' : 'unverified';
The problem know is to set the correct values in the function verify.
If I just set my signature in hexadecimal I get the notice:
Invalid signature: length = 256, k = 128 in C:\xampp\php\PEAR\Crypt\RSA.php on line 2175
So I have to customize the length of my signature:
$sign_bigInteger = new Math_BigInteger($sign, 16);
$sign_bytes = $sign_bigInteger->toBytes();
echo $rsa->verify($plaintext, $sign_bytes) ? 'verified' : 'unverified';
But the verification is false.
I get the output of the verification function in RSA.php (_rsassa_pkcs1_v1_5_verify) where plaintext is compared with the signature :
//sign
"ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ0 0*†H†÷ ÖÀZ!Q*y¡ßë*&/"
//plaintext
"ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ0!0 +•q£îê“O•äQ».åüÓSœÝ["
I don't really understand whats happening in the Class RSA.php.
Can anyone help me and say what I do wrong?
EDIT:
Now I tried to convert my hexString.
$plaintext_bin = pack("H*", $plaintext);
$sign_bin = pack("H*", $sign);
I think that my public key is correct generated, so I just change the input of my verify:
$rsa->verify($plaintext_bin, $sign_bin) ? 'verified' : 'unverified';
Output:
em: string(128) "ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ0 0*†H†÷ ÖÀZ!Q*y¡ßë*&/"
em2: string(128) "ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ0!0 +ÚÚÿMG‡­ã31G ,;D>7o"
It's still not the same.
EDIT:
I fixed my problem. I forgot to set the Hash:
$rsa1->setHash('md5');
Now it works!
Thank you GregS.
All your values are hex strings. Just convert them using hex2bin() or pack("H*", $hex_string);

Categories