文件系统增量备份 -- Python脚本


对于数据备份,我一向比较小心,一般会定时进行相关备份,但即便如此,也吃过亏,主要是一些存放于虚拟机里的资料,由于没有及时备份,有一次虚拟机故障导致文件丢失损失惨重,究其原因,主要是文件太多,完全备份太麻烦,而手工的增量备份,肯定会存在漏网之鱼

基于此,前段时间刚好不太忙,花了将近一天时间,开发了一个针对文件系统的自动增量备份脚本

该脚本的主要特点是:

1. 有不错的效率。我测试了自己的电脑上的某个目录,该目录总共53G的文件(文件数量27万多个,文件夹数量1万7千多个),增量更新查询用时3分钟,这里只提供查询的时间(不备份),因为备份文件的话,由于无法评估待更新的文件数量,所以没办法做一个比较

2. 可以设置不备份的路径,通过指定IgnorePath.txt的内容即可

3. 完善的日志提示

脚本代码如下

# -*- coding:utf-8 -*-

"""
@作者:    Jaymi
@日期:    20190906
@其他说明: 因为使用了日志打印输出有使用中文,某些(linux)系统的sys.stdout.encoding是不支持中文的
          查询指令 import sys print(sys.stdout.encoding)
          此时运行脚本在标准输出的打印可能是乱码,这种情况用下下面的方法运行脚本即可
          PYTHONIOENCODING=gbk python3 backup_fb.py
"""

import os
import datetime
import sys
import logging
import shutil


# 源路径 -- 该目录的文件应是更新/更全的
path_a = '/lib'

# 目的路径 -- 用于备份源目录,该目录的文件应该较旧和不全的
path_b = '/home/lib_1'

# 开始的时间
now_begin = datetime.datetime.now() 

# 忽略不处理的路径
ignorepath = []

# 需要递归处理的目录
recursion_path = []
recursion_path.append(path_b)

# 编码错误的文件列表
unicode_error = []
# 编码错误的文件列表
permission_error = []
# 其他错误的文件列表
other_error = []

# 待复制的文件/文件夹数量
newfile_count = 0
# 实质复制的文件/文件夹数量
file_count = 0

LOG_FORMAT = "%(asctime)s %(message)s"                       
DATE_FORMAT = "%Y%m%d %H:%M:%S" 
fp = logging.FileHandler('backup.log', encoding='utf-8')
fs = logging.StreamHandler()
logging.basicConfig(level=logging.DEBUG, format=LOG_FORMAT, datefmt=DATE_FORMAT, handlers=[fp, fs])    
logging.info("%s%s%s%s%s" % ("从 ",path_a," 往 ",path_b," 做增量备份更新开始..."))

def is_path_exists(path1):
    if os.path.exists(path1):
        if os.path.isfile(path1):
            return False
        else:
            return True
    else:
        return False
        
def is_file_exists(path1):
    if os.path.exists(path1):
        if os.path.isfile(path1):
            return True
        else:
            return False
    else:
        return False

if is_path_exists(path_a) == False:
    logging.info("%s%s" % ("源路径不存在:",path_a))
    sys.exit()
    
if is_path_exists(path_b) == False:
    logging.info("%s%s" % ("目的路径不存在:",path_b))
    sys.exit()
    
def copy_tree(pathname_a,pathname_b):
    global file_count
    try:
        shutil.copytree(pathname_a,pathname_b)
    except UnicodeDecodeError: 
        logging.info("%s%s" % ("复制文件夹失败(UnicodeDecodeError): ",pathname_a))
        unicode_error.append(pathname_a)
    except PermissionError:    
        logging.info("%s%s" % ("复制文件夹失败(PermissionError): ",pathname_a))
        permission_error.append(pathname_a)
    except Exception:
        logging.info("%s%s" % ("复制文件夹失败(其他Exception): ",pathname_a))
        other_error.append(pathname_a)
    else:
        file_count = file_count + 1
       
def copy_file(pathname_a,pathname_b):
    global file_count
    try:
        shutil.copy(pathname_a,pathname_b)
    except UnicodeDecodeError: 
        logging.info("%s%s" % ("复制文件失败(UnicodeDecodeError): ",pathname_a))
        unicode_error.append(pathname_a)
    except PermissionError:    
        logging.info("%s%s" % ("复制文件失败(PermissionError): ",pathname_a))
        permission_error.append(pathname_a)
    except Exception:
        logging.info("%s%s" % ("复制文件失败(PermissionError): ",pathname_a))
        other_error.append(pathname_a)
    else:
        file_count = file_count + 1

# 读取不备份的目录
with open('IgnorePath.txt',encoding='gbk') as f:
    while True:
        text_line = f.readline()
        if text_line:
            ignorepath.append(text_line.replace("\n",""))
        else:
            break

# 协助调试函数
def debug_fuc(source ,txt_now, tag):
    if source == txt_now:
        logging.info("%s%s" % ("debug_info_fuc: ",tag))

for root,dirs,files in os.walk(path_a): 
    
    if root != path_a:
    
        # 如果不是path_a第一层路径
 
        pathname_a = root
        pathname_b = root.replace(path_a,path_b)
        
        lastpath = pathname_b[0:pathname_b.rfind("\\")]
        
        if pathname_a in ignorepath:
            continue
        
        if lastpath not in recursion_path:
            continue
        
        if is_path_exists(pathname_b) == False:
            newfile_count = newfile_count + 1
            logging.info("%s%s%s%s" % ("开始复制文件夹:",pathname_a," --> ",pathname_b))
            copy_tree(pathname_a,pathname_b)
            continue
        else:
            recursion_path.append(pathname_b) 
    else:
    
        # 如果是path_a第一层路径
        
        pathname_a = root
        pathname_b = root.replace(path_a,path_b)
        
        for dir in dirs:
            pathname_a = os.path.join(root,dir)
            pathname_b = pathname_a.replace(path_a,path_b)
           
            if pathname_a in ignorepath:
                continue
            
            if is_path_exists(pathname_b):
                recursion_path.append(pathname_b)
                
    # 开始处理当前root下的子目录
    for dir in dirs:
        pathname_a = os.path.join(root,dir)
        pathname_b = pathname_a.replace(path_a,path_b)
        
        if pathname_a in ignorepath:
            continue
       
        if is_path_exists(pathname_b) == False:
            logging.info("%s%s%s%s" % ("开始复制文件夹:",pathname_a," --> ",pathname_b))
            newfile_count = newfile_count + 1
            copy_tree(pathname_a,pathname_b)
            ignorepath.append(pathname_a)
            continue
        else:
            recursion_path.append(pathname_b)
            
    # 开始处理当前root下的文件
    for file in files:
        filename_a = os.path.join(root,file)
        filename_b = filename_a.replace(path_a,path_b)
            
        if is_file_exists(filename_b) == False:
            logging.info("%s%s%s%s" % ("开始复制文件:",filename_a," --> ",filename_b))
            newfile_count = newfile_count + 1
            copy_file(filename_a,filename_b)
            continue
            
        timestamp_a = os.path.getmtime(filename_a)
        timestamp_b = os.path.getmtime(filename_b)
            
        if timestamp_a > timestamp_b:
            logging.info("%s%s%s%s" % ("开始复制文件:",filename_a," --> ",filename_b))
            newfile_count = newfile_count + 1
            copy_file(filename_a,filename_b)

if len(unicode_error) > 0:
    for filepath in  unicode_error:
        logging.info("%s%s" % ("存在因编码错误复制失败的文件(夹):",filepath))
        
if len(permission_error) > 0:
    for filepath in  permission_error:
        logging.info("%s%s" % ("存在因权限错误复制失败的文件(夹):",filepath))
            
if len(other_error) > 0:
    for filepath in  other_error:
        logging.info("%s%s" % ("存在因其他错误复制失败的文件(夹):",filepath))

# 结束的时间
now_end = datetime.datetime.now()
            
logging.info("增量备份更新完成...")

logging.info("%s%s%s%s%s" % ("发现待更新文件(夹) ",newfile_count," 个,更新成功文件(夹) ",int(file_count)," 个..."))

logging.info("%s%s%s" % ("总用时 ",int((now_end-now_begin).seconds)," 秒..."))

logging.info("%s%s%s%s%s" % ("从 ",path_a," 往 ",path_b," 做增量备份更新结束..."))      

部分结果截图

第一次使用该脚本来增量备份大型完整的文件系统时,有可能因为权限问题,路径异常等异常情况会存在某些文件或者文件夹复制失败

此时需要手工处理一特殊的文件或者文件夹,比如不备份路径过长的目录,手工备份特殊的文件等,经过一次处理后,再次使用此脚本便可顺利流畅地进行增量备份

暂无评论

注册用户登录后才能发表或者回复评论,请先登录 注册。