5fe01113a56026439.jpg_fo742.jpg

备份wordpress图片到七牛云


在wordpress创建一张表imgBak:

img imgBak md5
vchar255 vchar255 vchar255-key
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;

-- ----------------------------
-- Table structure for imgbak
-- ----------------------------
DROP TABLE IF EXISTS `imgbak`;
CREATE TABLE `imgbak`  (
  `img` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
  `imgBak` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,
  `md5` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,
  PRIMARY KEY (`md5`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;

SET FOREIGN_KEY_CHECKS = 1;

单py实现:

# -*- coding: utf-8 -*-
# flake8: noqa
from DBUtils.PooledDB import PooledDB
import MySQLdb
from qiniu import Auth, put_file, BucketManager
from datetime import datetime, date, timedelta
import time
import sys
import os
import re
import hashlib

access_key = '****-euT'
secret_key = '****'
q = Auth(access_key, secret_key)
bucket = BucketManager(q)
bucket_name = 'img-base11111'
bucket_domain = 'https://cdn.xxx.xxx/'
pool_wordpress = PooledDB(MySQLdb, 5, host='192.168.2.222', user='wordpress', passwd='123456.', db='wordpress', port=3306)

def md5(str):
    m = hashlib.md5()
    m.update(str.encode("utf8"))
    print(m.hexdigest())
    return m.hexdigest()

def md5GBK(str1):
    m = hashlib.md5(str1.encode(encoding='gb2312'))
    print(m.hexdigest())

def queryDB(pool, SQL):
    try:
        conn = pool.connection()
        cur = conn.cursor()
        r = cur.execute(SQL)
        r = cur.fetchall()
        cur.close()
        conn.commit()
        conn.close()
        return list(r)
    except:
        print('mysql pool error')

def fetch_url(url_old):
    q = Auth(access_key, secret_key)
    (filepath, tempfilename) = os.path.split(url_old)
    (shotname, extension) = os.path.splitext(tempfilename)
    key = time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())) + "/" + shotname + extension
    token = q.upload_token(bucket_name, key)
    ret, info = bucket.fetch(url_old, bucket_name, key)
    return bucket_domain + key

def trans_url(html):
    results = re.findall("(?isu)(https\://[a-zA-Z0-9\.\?/&\=\:]+)", str(html))
    for url_old in results:
        (filepath, tempfilename) = os.path.split(url_old)
        (shotname, extension) = os.path.splitext(tempfilename)
        if extension in ['.jpg', '.JPG', '.png', '.PNG', '.gif', '.GIF', '.bmp', '.BMP']:
            # 只转换指定格式资源
            if 'https://cdn.sre.ink' not in url_old:
                # 本域名不再转换
                url_old_md5 = md5(url_old)
                sql_check_md5 = 'select md5 from imgBak where md5=' + url_old_md5
                md5_check_result = queryDB(pool_wordpress, sql_check_md5)
                if md5_check_result == None:
                    # md5值相同的原地址不再转换
                    url_qiniu = fetch_url(url_old)
                    sql_bak_url = 'insert into imgBak (img,imgBak,md5) values ("%s","%s","%s");' % (
                        url_old, url_qiniu, url_old_md5)
                    queryDB(pool_wordpress, sql_bak_url)

def bak_wordpress_url(days_to_bak):
    days_to_bak_date = (date.today() + timedelta(days=-days_to_bak)).strftime("%Y-%m-%d")
    sql_content = "SELECT post_content FROM wp_posts WHERE post_modified > '%s' AND post_status = 'publish' AND post_type = 'post';" % (
        days_to_bak_date)
    last_post = queryDB(pool_wordpress, sql_content)
    for html in last_post:
        trans_url(html)

if __name__ == '__main__':
    reload(sys)
    sys.setdefaultencoding("utf-8")
    bak_wordpress_url(15)

加入定时任务:

chmod +x /opt/bak_wordpress_img.py
dos2unix /opt/bak_wordpress_img.py
#加入crontab,每周备份一次:
@weekly /opt/bak_wordpress_img.py

发表回复

您的电子邮箱地址不会被公开。 必填项已用*标注

Captcha Code