1. 新建文件夹
1if not os.path.exists(feature_dir):
2 os.makedirs(feature_dir)
2. 后台运行并保存log
1nohup python -u test.py > test.log 2>&1 &
2
3
4
5
6
7
8tail -f test.log
9cat test.log
3. 文件读取
1
2
3with open(r'./data/user_dict.txt','r',encoding='utf-8') as f:
4 data = f.readlines()
5
6with open(r'./data/user_dict.txt','a',encoding='utf-8') as f:
7 t = '你好'
8 f.write('\\n'+t)
9
10
11with open('./data/train.tsv',encoding = 'utf-8') as file:
12 line = file.readline()
13 limit = 0
14 while line and limit<10:
15 print(line)
16 limit+=1
17 line = file.readline()
18
19
20x = {..}
21
22with open(r"./x.json",'w') as f:
23 json.dump(x, f, ensure_ascii=False)
24print('done')
25
26with open(r"result.json", 'w') as f:
27 json.dump(res, f, ensure_ascii=False, indent=4)
28
29
30with open(r"./x.json",'r') as f:
31 x = json.loads(f.readlines()[0])
32
33with open(r"./x.json",'r') as f:
34 x = json.load(f)
35
36
37x = [x,]
38np.save("./././x.npy",x)
39x = np.load(r"./././x.npy")
40
41
42
43data = pd.read_excel(r'xxxx.xlsx','Sheet1')
44
45
46result = {x:1,y:2,..}
47df = pd.DataFrame(list(result.items()), columns=['key','value'])
48
49df.to_csv(r"./result.csv", index=False,header=True)
50
51df = pd.read_csv(r'./result.csv',encoding = 'gbk')
4. 字符串判断
1s.islower() #判断是否所有字符小写
2s.isupper() #判断是否所有字符大写
3s.isalpha() #判断是否所有字符为字母
4s.isalnum() #判断是否所有字符为字母或数字
5s.isdigit() #判断是否所有字符为数字
6s.istitle() #判断是否所有字符为首字母大写
5. 统计list元素出现次数
1from collections import Counter
2x = [1,2,3,2]
3y= '1232'
4Counter(x)
5#>>Counter({2: 2, 1: 1, 3: 1}) #就是一个dict
6Counter(y)
7#>>Counter({'2': 2, '1': 1, '3': 1})
8Counter('1232')['2']
9#>>2
6. timestamp 转换标准时间
1# 把时间处理 以找到登陆时间
2import time
3def timestamp_datetime(value):
4 format = '%Y-%m-%d %H:%M:%S'
5 # value为传入的值为时间戳(整形),如:1332888820
6 value = time.localtime(value)
7 ## 经过localtime转换后变成
8 ## time.struct_time(tm_year=2012, tm_mon=3, tm_mday=28, tm_hour=6, tm_min=53, tm_sec=40, tm_wday=2, tm_yday=88, tm_isdst=0)
9 # 最后再经过strftime函数转换为正常日期格式。
10 dt = time.strftime(format, value)
11 return dt
12def datetime_timestamp(dt):
13 #dt为字符串
14 #中间过程,一般都需要将字符串转化为时间数组
15 time.strptime(dt, '%Y-%m-%d %H:%M:%S')
16 ## time.struct_time(tm_year=2012, tm_mon=3, tm_mday=28, tm_hour=6, tm_min=53, tm_sec=40, tm_wday=2, tm_yday=88, tm_isdst=-1)
17 #将"2012-03-28 06:53:40"转化为时间戳
18 s = time.mktime(time.strptime(dt, '%Y-%m-%d %H:%M:%S'))
19 return int(s)
20
21d = datetime_timestamp('2015-03-30 16:38:20')
22print(d)
23s = timestamp_datetime(1427704700)
24print(s)
7. 排序
1#方法1.用List的成员函数sort进行排序,在本地进行排序,不返回副本
2#方法2.用built-in函数sorted进行排序(从2.4开始),返回副本,原始输入不变
3listX = [[1,4],[2,5],[3,3]]
4sorted(listX, key=lambda x : x[1])
5#>>[[3, 3], [1, 4], [2, 5]]
6
7### 两个list按同意顺序排序
8list1 = [1, 2, 3, 4, 15, 6]
9list2 = ['a', 'b', 'c', 'd', 'e', 'f']
10c = list(zip(list1,list2))
11c.sort(reverse=True) #降序du
12list1[:],list2[:] = zip(*c)
13print(list1,list2)
8. 文件路径获取
1path1 = os.getcwd()
2path2 = os.path.dirname(os.path.realpath(__file__))
9. 同一行刷新打印
1print("\\r",object,end="",flush=True)
2
3
4for i,img_name in enumerate(img_names):
5 print("\\r",str(i)+"/"+str(len(img_names)),end="",flush=True)
10. PIL resize比opencv更清晰
1img = cv2.imread("000000000113_0.jpg")
2img = Image.fromarray(img)
3img = img.resize((192,192))
4img = np.array(img)
11. base64转opencv
1def imgToBase64(img_array):
2 # 传入图片为RGB格式numpy矩阵,传出的base64也是通过RGB的编码
3 img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR) #RGB2BGR,用于cv2编码
4 encode_image = cv2.imencode(".jpg", img_array)[1] #用cv2压缩/编码,转为一维数组
5 byte_data = encode_image.tobytes() #转换为二进制
6 base64_str = base64.b64encode(byte_data).decode("ascii") #转换为base64
7 return base64_str
8
9def base64ToImg(base64_str):
10 # 传入为RGB格式下的base64,传出为RGB格式的numpy矩阵
11 byte_data = base64.b64decode(base64_str)#将base64转换为二进制
12 encode_image = np.asarray(bytearray(byte_data), dtype="uint8")# 二进制转换为一维数组
13 img_array = cv2.imdecode(encode_image, cv2.IMREAD_COLOR)# 用cv2解码为三通道矩阵
14 img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)# BGR2RGB
15 return img_array
评论
查看更多