在windows总安装Minio
进入网址
在Windows安装时,选择相应的exe文件下载,下载到本地后,使用如下的命令即可在前台启动:
minio.exe server D:\your_path
或者将该路径写进环境变量的path中,用来为minio指定数据存储路径。
执行成功的效果如下如所示:
在python中使用Minio
python中安装minio
包
pip install minio
在promtp shell
启动minio,注意该shell
不能关闭
minio.exe server D:\MinIO # 后面的路劲是存放数据的地址
浏览器打开127.0.0.1:9000
输入默认账户和密码minioadmin
, minioadmin
获取bucket中的对象
通过list_objects
获取Minio
中的对象
objects = client.list_objects(bucket_name, prefix=None, recursive=True)
for obj in objects:print(obj.bucket_name, obj.object_name.encode('utf-8'), obj.last_modified,obj.etag, obj.size, obj.content_type) # 进一步查看object的属性
上传本地文件到Minio
通过fput_object
方法可以将本地文件上传到Minio
# 上传文件,bucket_name 桶名称, object_name: 上传到桶中的完整的文件路径, file_path:文件本地所在完整路径
result = client.fput_object(bucket_name=bucket_name,object_name='data1/'+file_name,file_path=file_path+"/"+file_name)
print(result.object_name,result.bucket_name, result.etag)
从Minio下载图片到本地
通过fget_object
方法可以将文件从Minio
下载到本地
client.fget_object('test', 'baike_knowledge/46c88e9e080efc4c0e8742e022130d7e/20231109112809770.jpeg', 'this.jpeg')
爬取图片url,并上传到Minio
使用request
库爬取网络图片,并使用Minio
的upload_pictures()
方法将数据上传到MinIO
上
import requests
from io import BytesIO
from minio import Minio
from minio.error import S3Error
from hashlib import md5
import datetimeminio = Minio(endpoint="127.0.0.1:9000",access_key='minioadmin',secret_key='minioadmin',secure=False)def upload_pictures(client, source_url, pic_url, bucket_name, spider_name):try:response = requests.get(pic_url) # crawling pictureimage = BytesIO(response.content) # change the picture to bytes typesize = len(image.getvalue()) # calculate the object sizepage_md5 = md5(str(source_url).encode("utf-8")).hexdigest() # use md5(url) as piece of file nametimestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")[:-3] # timestamp as part of object_namecontent_type = pic_url.split('.')[-1] # get picture type: jpg, png, jpeg etc.# Upload the image to MinIO serverclient.put_object(bucket_name=bucket_name, object_name=f"{spider_name}/{page_md5}/{timestamp}.{content_type}", data=image,content_type=f"image/{content_type}", length=size)# when you extract data, you should not includ the bucket name in the whole pathpic_path = f'{bucket_name}/{spider_name}/{page_md5}/{timestamp}.{content_type}'# logging.info(f'save picture to minio path {pic_path}')return pic_pathexcept requests.exceptions.InvalidURL:print("无效的 URL")except requests.exceptions.HTTPError as e:print(f"HTTP 错误: {e}")except S3Error as err:print("Error occurred: ", err)except Exception as e:print(f"发生错误: {e}")return None
移除MinIO中size为0的object
from minio import Miniominio = Minio(endpoint="127.0.0.1:9000",access_key='minioadmin',secret_key='minioadmin',secure=False)def remove_zero_size(client, bucket_name):object = client.list_objects(bucket_name, prefix=None, recursive=True)for obj in object:print(obj.bucket_name, obj.object_name.encode('utf-8'), obj.last_modified,obj.etag, obj.size, obj.content_type) # 进一步查看object的属性if obj.size == 0:# Remove an object.client.remove_object(bucket_name, obj.object_name.encode('utf-8'))
remove_zero_size(minio, bucket_name='size')
函数说明 https://www.bookstack.cn/read/miniocookbookzh/24.md
https://blog.csdn.net/weixin_40547993/article/details/110682587https://blog.csdn.net/quyingzhe0217/article/details/129727120
https://blog.csdn.net/Deaohst/article/details/128699370
https://www.python100.com/html/Q2I2IO5C25Z5.html
https://www.cnblogs.com/mian-1122/p/17463849.html
安装minio https://zhuanlan.zhihu.com/p/514794125
https://blog.csdn.net/m0_72838865/article/details/126599849
https://blog.csdn.net/LONG729564606/article/details/129331498
https://blog.csdn.net/feritylamb/article/details/126246293