from xpinyin import Pinyinpin = Pinyin()from collections import OrderedDictdef deal_conversion(tags): """ 将标签中的值得中文名处理为拼音,英文名不变 :param tags: :return: """ _tags = [] for tag in tags: value = tag.get("value") pinyin_value = pin.get_pinyin(value) tag['conversion'] = pinyin_value _tags.append(tag) return _tagsdef sorted_by_conversion(tags): """ 基本按照拼音排序 :param tags: :return: """ tags = sorted(tags, key=lambda tag: tag['conversion']) return tagsdef sort_by_capital(new_tags): """ 归类 tag_list = [ {"class": 'A', 'tags': []}, {"class": 'B', 'tags': []}, ] :param :return: """ # print("new_tags:", new_tags) A = [] B = [] C = [] D = [] E = [] F = [] G = [] J = [] H = [] I = [] J = [] K = [] L = [] M = [] N = [] O = [] P = [] Q = [] R = [] S = [] T = [] U = [] V = [] W = [] X = [] Y = [] Z = [] other = [] capital_list = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'] # 将标签依次放到相应列表 for tag in new_tags: conversion = tag.get("conversion") if conversion: start = conversion.upper()[0] if start in capital_list: local_dict = locals() capital_single = local_dict.get(start) capital_single.append(tag) else: other.append(tag) # 构造聚合类的列表 class_list = [] for capital in capital_list: class_dict = {} class_dict["class"] = capital local_dict = locals() capital_single_list = local_dict.get(capital) if len(capital_single_list) > 0: class_dict["tags"] = capital_single_list class_list.append(class_dict) # 如果大写字母的列表是空的,就不用返回了 # else: # class_dict["tags"] = [] # class_list.append(class_dict) # 根据聚合类的大写字母排序 tag_list = sorted(class_list, key=lambda item: item['class']) # 去重 b = OrderedDict() for item in tag_list: b.setdefault(item['class'], {**item, }) tag_list = list(b.values()) # 将非字母的数字和特殊符号单独放置在class: #中,放到聚类末尾 class_other = {} class_other["class"] = "#" if len(other) > 0: class_other["tags"] = other tag_list.append(class_other) # 给所有的聚类列表伪造一个"全部"头 header = { "class": "", "tags": [ { "id": "100000000000000000000003", "value": "全部" } ] } tag_list.insert(0, header) return tag_listif __name__ == '__main__': tags = [ { "_id" : "5c810376dd40ba638423c623", "key" : "brand", "value" : "3.1 Phillip Lim", "_type" : "brand" }, { "_id": "5c810376dd40ba638423c623", "key": "star", "value": "贾静雯", "_type": "star" }, { "_id" : "5c810376dd40ba638423c623", "key" : "brand", "value" : "A Détacher", "_type" : "brand" }, { "_id" : "5c810376dd40ba638423c623", "key" : "brand", "value" : "Z Peace Treaty", "_type" : "brand" }, { "_id": "5c810376dd40ba638423c623", "key": "123 ", "value": "#@$", "_type": "brand" }, { "_id" : "5c810376dd40ba638423c623", "key" : "brand", "value" : "Spark", "_type" : "brand" }, { "_id" : "5c810376dd40ba638423c623", "key" : "star", "value" : "陈奕迅", "_type" : "star" }, { "_id": "5c810376dd40ba638423c623", "key": "star", "value": "林青霞", "_type": "star" }, { "_id" : "5c810376dd40ba638423c623", "key" : "brand", "value" : "Hello", "_type" : "brand" }, { "_id": "5c810376dd40ba638423c623", "key": "123 ", "value": "Hello", "_type": "brand" },] # 将中文名转换成拼音,英文的不变 tags = deal_conversion(tags) # print(tags) # 根据转换值排序,即特殊字符-数字-英文26字母大写-英文26字母小写排序 tags = sorted_by_conversion(tags) # print(tags) # 根据大写字母分别放到相应大写字母的列表,数字和特殊字符安置在其他,然后聚类 tags = sort_by_capital(tags) print(tags)# 结果# [{# 'class': '',# 'tags': [{# 'id': '100000000000000000000003',# 'value': '全部'# }]# }, {# 'class': 'A',# 'tags': [{# '_id': '5c810376dd40ba638423c623',# 'key': 'brand',# 'value': 'A Détacher',# '_type': 'brand',# 'conversion': 'A Détacher'# }]# }, {# 'class': 'C',# 'tags': [{# '_id': '5c810376dd40ba638423c623',# 'key': 'star',# 'value': '陈奕迅',# '_type': 'star',# 'conversion': 'chen-yi-xun'# }]# }, {# 'class': 'H',# 'tags': [{# '_id': '5c810376dd40ba638423c623',# 'key': 'brand',# 'value': 'Hello',# '_type': 'brand',# 'conversion': 'Hello'# }, {# '_id': '5c810376dd40ba638423c623',# 'key': '123 ',# 'value': 'Hello',# '_type': 'brand',# 'conversion': 'Hello'# }]# }, {# 'class': 'J',# 'tags': [{# '_id': '5c810376dd40ba638423c623',# 'key': 'star',# 'value': '贾静雯',# '_type': 'star',# 'conversion': 'jia-jing-wen'# }]# }, {# 'class': 'L',# 'tags': [{# '_id': '5c810376dd40ba638423c623',# 'key': 'star',# 'value': '林青霞',# '_type': 'star',# 'conversion': 'lin-qing-xia'# }]# }, {# 'class': 'S',# 'tags': [{# '_id': '5c810376dd40ba638423c623',# 'key': 'brand',# 'value': 'Spark',# '_type': 'brand',# 'conversion': 'Spark'# }]# }, {# 'class': 'Z',# 'tags': [{# '_id': '5c810376dd40ba638423c623',# 'key': 'brand',# 'value': 'Z Peace Treaty',# '_type': 'brand',# 'conversion': 'Z Peace Treaty'# }]# }, {# 'class': '#',# 'tags': [{# '_id': '5c810376dd40ba638423c623',# 'key': '123 ',# 'value': '#@$',# '_type': 'brand',# 'conversion': '#@$'# }, {# '_id': '5c810376dd40ba638423c623',# 'key': 'brand',# 'value': '3.1 Phillip Lim',# '_type': 'brand',# 'conversion': '3.1 Phillip Lim'# }]# }]