Stand Library

Hashing

Python’s standard library has you covered in the hashlib module. It includes the FIPS secure hash algorithms SHA1, SHA224, SHA256, SHA384, and SHA512 as well as RSA’s MD5 algorithm.

1
2
3
4
5
6
7
import hashlib
md5 = hashlib.md5()
# md5.update('I Love Python')
# Unicode-objects must be encoded before hashing
md5.update(b'I Love Python')
md5.digest()
hashlib.sha256(b'I Love Python').hexdigest()
b'K\xf2\xe0+\nV\xf6\x95\xb1\xd1d`\x85v\x9aJ'
'28ea18a0df1c8dbec5850c9164da84d27522189bc832703ab39d757d96fb0333'

Key Derivation

1
2
3
4
5
6
import binascii
dk = hashlib.pbkdf2_hmac(hash_name='sha256',
password=b'password',
salt=b'salt',
iterations=100000)
binascii.hexlify(dk)
b'0394a2ede332c9a13eb82e9b24631604c31df978b4e2f0fbd2c549944f9d79a5'

PyCryptodome

Install

1
2
pip3 install pycryptodome     --index-url http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com # linux
pip3 install pycryptodomex --index-url http://mirrors.aliyun.com/pypi/simple/ --trusted-host mirrors.aliyun.com # win

DES(Data Encryption Standard)

key : it’s 8 bytes
Pad function: pad any string out with spaces until it’s a multiple of 8

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from Cryptodome.Cipher import DES

key = b'abcdefgh'

def pad(text):
text = text.decode()
while len(text) % 8 != 0:
text+= ' '
return bytes(text,'utf-8')

des = DES.new(key, DES.MODE_ECB)
text = b'I Love Python'
# encrypted_text = des.encrypt(text)
# Data must be aligned to block boundary in ECB mode
padded_text = pad(text)
# b'I Love Python '
encrypted_text = des.encrypt(padded_text)
encrypted_text
des.decrypt(encrypted_text)
b'y\x9aM\xac\x17Y\x8f\xfc^\xa5p\x14Lq\xb6\x0e'
b'I Love Python   '

AES(Advanced Encryption Standard)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from Cryptodome.Cipher import AES
from Cryptodome.Util.Padding import pad
from base64 import b64encode, b64decode

key = '/I02fMuSSvnouuu+/vyyD7NuSEVDB/0gte/z50dM0b4='
data = 'hello world!'

cipher = AES.new(b64decode(key), AES.MODE_CBC, iv=b'0123456789abcdef')
padded_data = pad(data.encode(), cipher.block_size)
print(padded_data)
# b'hello world!\x04\x04\x04\x04'
print(b64encode(padded_data))
# b'aGVsbG8gd29ybGQhBAQEBA=='
ciphertext = cipher.encrypt(padded_data)
print(ciphertext)
# b'\xf0m\n/e"\n\xfe\xd4\xa3\xeaJ2oF\xf8'
print(b64encode(ciphertext))
# b'8G0KL2UiCv7Uo+pKMm9G+A=='
ciphertext = cipher.encrypt(padded_data)
print(b64encode(ciphertext))
# b'tBXcf/Nf6MtxM1ulzNnIlw=='


cipher = AES.new(b64decode(key), AES.MODE_CBC, iv=b'0123456789abcdef')
padded_data = pad(data.encode(), cipher.block_size)
ciphertext = cipher.encrypt(padded_data)
print(b64encode(ciphertext))
# b'8G0KL2UiCv7Uo+pKMm9G+A=='

RSA

If you want to encrypt your data with RSA, then you’ll need to either have access to a public / private RSA key pair or you will need to generate your own.
Ok Let’s we will just generate our own.

1
2
3
4
5
6
7
8
9
10
11
12
from Cryptodome.PublicKey import RSA
code = 'nobodyknows'
key = RSA.generate(2048) # an RSA key of 2048 bits
encrypted_key = key.exportKey(passphrase=code, pkcs=8,
protection="scryptAndAES128-CBC")
# private key.
# which PKCS standard to use and which encryption scheme to use

with open('my_private_rsa_key.bin', 'wb') as f:
f.write(encrypted_key)
with open('my_rsa_public.pem', 'wb') as f:
f.write(key.publickey().exportKey())
1865
450

Encrypting a File

For this example we are going to be using a hybrid encryption method, so we use PKCS1 OAEP, which is Optimal asymmetric encryption padding. This allows us to write a data of an arbitrary length to the file. Then we create our AES cipher, create some data and encrypt the data. This will return the encrypted text and the MAC. Finally we write out the nonce, MAC (or tag) and the encrypted text.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
from Cryptodome.PublicKey import RSA
from Cryptodome.Random import get_random_bytes
from Cryptodome.Cipher import AES, PKCS1_OAEP

with open('encrypted_data.bin', 'wb') as out_file:
recipient_key = RSA.import_key(open('my_rsa_public.pem').read())
session_key = get_random_bytes(16)

cipher_rsa = PKCS1_OAEP.new(recipient_key)
out_file.write(cipher_rsa.encrypt(session_key))

cipher_aes = AES.new(session_key, AES.MODE_EAX)
data = b'blah blah blah Python blah blah'
ciphertext, tag = cipher_aes.encrypt_and_digest(data)

out_file.write(cipher_aes.nonce)
out_file.write(tag)
out_file.write(ciphertext)
256
16
16
31

Decrypt data

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from Cryptodome.PublicKey import RSA
from Cryptodome.Cipher import AES, PKCS1_OAEP

code = 'nobodyknows'

with open('encrypted_data.bin', 'rb') as fobj:
private_key = RSA.import_key(
open('my_private_rsa_key.bin').read(),
passphrase=code)

enc_session_key, nonce, tag, ciphertext = [ fobj.read(x)
for x in (private_key.size_in_bytes(),
16, 16, -1) ]

cipher_rsa = PKCS1_OAEP.new(private_key)
session_key = cipher_rsa.decrypt(enc_session_key)

cipher_aes = AES.new(session_key, AES.MODE_EAX, nonce)
data = cipher_aes.decrypt_and_verify(ciphertext, tag)

print(data)
b'blah blah blah Python blah blah'

Cryptography

The cryptography package aims to be “cryptography for humans” much like the requests library is “HTTP for Humans”.

The Fernet module implements an easy-to-use authentication scheme that uses a symmetric encryption algorithm which guarantees that any message you encrypt with it cannot be manipulated or read without the key you define. The Fernet module also supports key rotation via MultiFernet.

1
2
3
4
5
6
7
8
9
from cryptography.fernet import Fernet
cipher_key = Fernet.generate_key()
cipher_key
cipher = Fernet(cipher_key)
text = b'My super secret message'
encrypted_text = cipher.encrypt(text)
encrypted_text
decrypted_text = cipher.decrypt(encrypted_text)
decrypted_text
b'IIW_KsInMPbTR60cyxD8nza5RKv_noP_3_bUeoyrqYY='
b'gAAAAABaq148thkx3uK066S-J59eUdIG8UptOUnKAYVvHGpjjszje5WYX64lICXjgHRbalqFs8RU9V_ovABgO4U-9py6qDwHMAp5Qx4SjyIePb-IFZ0I0y8='
b'My super secret message'

Parse Netease Music

We want to download comments from this url,but the data was encrypted with some methods, then we want to find the the cipher out
URL : http://music.163.com/#/song?id=36990266

First Download Core.js and Simulate the requests using fiddler

wyy001

wyy002

wyy003

wyy004

GOT:

1
2
3
4
5
6
7
8
9
var bBU3x = window.asrsea(
JSON.stringify(j7c),
bsL0x(["流泪", "强"]),
bsL0x(TY1x.md),
bsL0x(["爱心", "女孩", "惊恐", "大笑"]));
e7d.data = k7d.cC8u({ params: bBU3x.encText, encSecKey: bBU3x.encSecKey })
}
czW5b(Y7R, e7d)
};
  • j7c : { rid: “R_SO_4_36990266”, offset: “0”, total: “true”, limit: “20”, csrf_token: “” }
  • bsL0x([“流泪”, “强”]) : 010001
  • bsL0x(TY1x.md) : 00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7
  • bsL0x([“爱心”, “女孩”, “惊恐”, “大笑”])) : 0CoJUm6Qyw8W8jud

Code

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import psycopg2

conn = psycopg2.connect(database="demo", user="lysql", password="123456", host="127.0.0.1", port="5432")
try:
cursor = conn.cursor()
transaction = cursor.execute("""CREATE TABLE MUSIC(
ID SERIAL PRIMARY KEY NOT NULL,
TIME BIGINT NOT NULL,
CONTENT TEXT NOT NULL,
LIKEDCOUNT INT NOT NULL,
NICKNAME TEXT NOT NULL,
USERID BIGINT NOT NULL,
AVATARURL TEXT NOT NULL,
AVATARPATH TEXT NOT NULL);
""")
print('Successful')
conn.commit()
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
print(conn)
if conn is not None:
conn.close()
Successful
<connection object at 0x0523B6B8; dsn: 'user=lysql password=xxx dbname=demo host=127.0.0.1 port=5432', closed: 0>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
def save_music(data):
conn = psycopg2.connect(database="demo", user="lysql", password="123456", host="127.0.0.1", port="5432")
table = "music"
keys = ', '.join(data.keys())
values = ', '.join(['%s'] * len(data))
sql = 'INSERT INTO {table}({keys}) VALUES ({values})'.format(table=table, keys=keys, values=values)
try:
cursor = conn.cursor()
cursor.execute(sql, tuple(data.values()));
conn.commit()
print ("Records created successfully");
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
if conn is not None:
conn.close()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from Cryptodome.Cipher import AES
from Cryptodome import Random
from Cryptodome.Util.Padding import pad
from hashlib import md5
from base64 import b64encode
import requests
from requests import RequestException
import json
import codecs
import time
import os

headers = {
"Accept":"*/*",
"Accept-Encoding":"gzip, deflate",
"Accept-Language":"zh-CN,zh;q=0.9,en;q=0.8",
"Connection":"keep-alive",
"Content-Type":"application/x-www-form-urlencoded",
"Host":"music.163.com",
"Origin":"http://music.163.com",
"Referer":"http://music.163.com/",
"User-Agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
}

comment = {
"rid": "R_SO_4_36990266",
"offset": "0",
"total":"true",
"limit": "20",
"csrf_token":""
}

modulus = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'
nonce = '0CoJUm6Qyw8W8jud'
pubKey = '010001'

def AES_encrypt(text, key, iv):
key = bytes(key,"utf-8")
text = bytes(text,"utf-8")
cipher = AES.new(key, AES.MODE_CBC, iv)
padded_data = pad(text, cipher.block_size)
ciphertext = cipher.encrypt(padded_data)
return b64encode(ciphertext)

def RSA_encrypt(text, pubKey, modulus):
text = text[::-1]
rs = int(codecs.encode(bytes(text,"utf-8"), 'hex'),16)** int(pubKey, 16) % int(modulus, 16)
return format(rs, 'x').zfill(256)

def get_params():
iv = b'0102030405060708'
text = str(comment)
encText1= AES_encrypt(text,nonce, iv)
encText2 = AES_encrypt(encText1.decode(),16 * 'F', iv)
encSecKey = RSA_encrypt(16 * 'F', pubKey, modulus)
data = {
'params': encText2.decode(),
'encSecKey': encSecKey
}
return data
get_params()
{'encSecKey': '257348aecb5e556c066de214e531faadd1c55d814f9be95fd06d6bff9f4c7a41f831f6394d5a3fd2e3881736d94a02ca919d952872e7d0a50ebfa1769a7a62d512f5f1ca21aec60bc3819a9c3ffca5eca9a0dba6d6f7249b06f5965ecfff3695b54e1c28f3f624750ed39e7de08fc8493242e26dbc4484a01c76f739e135637c',
 'params': 'ZNF1o/ZNVZXhOZHkz9GyshKLbI7TVHgmeFB5vQqDSALAON2CiXciAMOFpiU3mnXGZ+t9hYE/7h2wmv7oi2NmPBZ3POk1qtRif3ygZU2BQO3U8kFDasyz/Yk9kkb07D70iUc5rbvwAy62oXeLUCh39aniZ0EYkglrb8VTpUOChwSDkOXP49FnFjcwA8hSZtuR'}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
def get_json(url):
s = requests.Session()
data=get_params()
try:
r = requests.post(url, headers=headers, data=data,allow_redirects=True)
if r.status_code == 200:
return r.json()
else:
if response.history:
# for his in r.history:
# print (his.status_code, his.url)
print("Request was redirected to ", r.url)
try:
rr = requests.post(url, headers=headers, data=data)
if rr.status_code == 200:
return rr.json()
return(rr.status_code)
except RequestException:
return "Redirected, But Some Still thing Wrong!!!"
except RequestException as e:
return None
# json_text = get_json("http://music.163.com/weapi/v1/resource/comments/R_SO_4_36990266?csrf_token=")
# json_text
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
def save_avatar(url,nickname):
if not os.path.exists("avatars"):
os.mkdir("avatars")
try:
response = requests.get(url)
if response.status_code == 200:
file_path = '{0}/{1}.{2}'.format("avatars", md5(response.content).hexdigest(), 'jpg')
if not os.path.exists(file_path):
with open(file_path, 'wb') as f:
f.write(response.content)
else:
print('Already Downloaded', file_path)
return file_path
except requests.ConnectionError:
print('Failed to Save Image')
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
def parse_page(json, page):
if json:
comments = json.get('comments')
if page==1 and json.get('moreHot'):
comments = json.get('hotComments') + json.get('comments')
for comment in comments:
music = {}
music['content'] = comment.get('content')
music['likedCount'] = comment.get('likedCount')
music['time'] = comment.get('time')
comment = comment.get('user')
music['nickname'] = comment.get('nickname')
music['avatarurl'] = comment.get('avatarUrl')
file_path = save_avatar(comment.get('avatarUrl'),comment.get('nickname'))
print(file_path)
music['avatarpath'] = file_path
music['userId'] = comment.get('userId')
yield music

## TEST
# for result in parse_page(json_text):
# print(result)
# json_text.get('total')
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
for page in range(40,1000):
try:
JSON = get_json("http://music.163.com/weapi/v1/resource/comments/R_SO_4_36990266?csrf_token=")
if JSON.get('more'):
results = parse_page(JSON, page)
comment['offset'] = str(page*20-20)
if comment['total'] != "false" and page>1:
comment['total'] = "false"
print("PAGE : ",page)
for result in results:
print(result)
save_music(result)
time.sleep(5)
except AttributeError:
print(JSON)

Finally my spider was detected,but still got enough infomation
(‘Connection aborted.’, ConnectionResetError(10054, ‘远程主机强迫关闭了一个现有的连接。’, None, 10054, None))

netease

REFERENCES