python删除重复值、排序、查找最多元素等操作

python与大数据分析

发布于 2022-03-11 16:39:43

7930

发布于 2022-03-11 16:39:43

文章被收录于专栏：python与大数据分析

python删除重复值、排序、查找最多元素等操作

1、删除重复值、主要是列表和集合操作

2、关于排序，主要是对列表、元组、多重列表、集合以及对象排序

3、查找列表中出现最多的元素

# 删除可散列对象重复值，按集合规则顺序排序
def delrepdata(items):
    return set(items)

# 删除可散列对象重复值，元素显示顺序不变
def delrepdatawithnochangeorder(items):
    datas=set()
    for item in items:
        if item not in datas:
            yield item
            datas.add(item)

# 删除不可散列对象重复值，元素显示顺序不变
def delrepdatawithobject(items,key=None):
    datas=set()
    for item in items: #字典对象，item是键名
        var = item if key is None else key(item)  #字典对象，var是键值
        if var not in datas:
            yield item
            datas.add(var)  #字典对象，datas是个列表值的集合

# #找出列表中出现次数最多的元素
def findmosttopn(words,n=3):
    from collections import Counter
    word_counts = Counter(words)
    return word_counts.most_common(n)

class Student:
    def __init__(self, name, grade, age):
        self.name = name
        self.grade = grade
        self.age = age

    def __repr__(self):
        return repr((self.name, self.grade, self.age))

if __name__=='__main__':
    # 删除列表中重复值
    a=[5,1,3,4,6,5,3,3]
    print(list(delrepdatawithnochangeorder(a)))
    # [5, 1, 3, 4, 6]
    print(delrepdata(a))
    # {1, 3, 4, 5, 6}
    a=['a','b','b','c','a']
    print(list(delrepdatawithnochangeorder(a)))
    # ['a', 'b', 'c']
    print(delrepdata(a))
    # {'a', 'c', 'b'}
    print(list(delrepdatawithobject(a)))
    a=[{'x':2,'y':3},{'x':1,'y':4},{'x':2,'y':3},{'x':2,'y':4},{'x':3,'y':6}]
    # print(list(delrepdatawithnochangeorder(a)))
    # TypeError: unhashable type: 'dict'
    print(list(delrepdatawithobject(a,key=lambda a:(a['x'],a['y']))))
    # [{'x': 2, 'y': 3}, {'x': 1, 'y': 4}, {'x': 2, 'y': 4}, {'x': 3, 'y': 6}]

    #找出列表中出现次数最多的元素
    poems=['When','I','do','count','the','clock','that','tells','the','time',
           'And','see','the','brave','day','sunk','in','hideous','night',
           'When','I','behold','the','violet','past','prime',
           'And','sable','curls','all','silverd','oer','with','white',
           'When','lofty','trees','I','see','barren','of','leaves',
           'Which','erst','from','heat','did','canopy','the','herd']
    print(findmosttopn(poems))
    # [('the', 5), ('When', 3), ('I', 3)]
    print(findmosttopn(poems,5))
    # [('the', 5), ('When', 3), ('I', 3), ('And', 2), ('see', 2)]
    from collections import Counter
    word_counts=Counter(poems)
    # Counter.items() 存储各键名和键值项
    # Counter.keys() 存储各键名
    # Counter.values() 存储各键值
    top7=word_counts.most_common(7)
    print(top7)
    # [('the', 5), ('When', 3), ('I', 3), ('And', 2), ('see', 2), ('do', 1), ('count', 1)]

    # ------------对列表进行排序--------------
    alist=[3,4,1,2]
    print(sorted(alist))
    # [1, 2, 3, 4]
    print(sorted(alist,reverse=True))
    # [4, 3, 2, 1]

    # ------------对多重列表进行排序--------------
    alist = [[1,4,5],[5,3,4],[4,5,6]]
    # [[1, 4, 5], [5, 3, 4], [4, 5, 6]]
    print(sorted(alist, key=lambda x: x[0]))
    # [[1, 4, 5], [4, 5, 6], [5, 3, 4]]
    print(sorted(alist, key=lambda x: x[1]))
    # [[5, 3, 4], [1, 4, 5], [4, 5, 6]]
    print(sorted(alist, key=lambda x: x[2]))
    # [[5, 3, 4], [1, 4, 5], [4, 5, 6]]

    # ------------对元祖列表进行排序--------------
    student_tuples = [('john', 'A', 15),('jane', 'B', 12),('dave', 'B', 10)]
    print(sorted(student_tuples, key=lambda student: student[0]))
    # [('dave', 'B', 10), ('jane', 'B', 12), ('john', 'A', 15)]
    print(sorted(student_tuples, key=lambda student: student[1]))
    # [('john', 'A', 15), ('jane', 'B', 12), ('dave', 'B', 10)]
    print(sorted(student_tuples, key=lambda student: student[2]))
    # [('dave', 'B', 10), ('jane', 'B', 12), ('john', 'A', 15)]
    # 指定多参数排序，用()
    print(sorted(student_tuples, key=lambda student: (student[1],student[2])))
    # [('john', 'A', 15), ('dave', 'B', 10), ('jane', 'B', 12)]

    # ------------对字典列表进行排序--------------
    adictlist=[{'x': 2, 'y': 3}, {'x': 1, 'y': 4}, {'x': 2, 'y': 4}, {'x': 3, 'y': 6}]
    print(sorted(adictlist, key=lambda x: x['y']))
    # [{'x': 2, 'y': 3}, {'x': 1, 'y': 4}, {'x': 2, 'y': 4}, {'x': 3, 'y': 6}]

    # ------------对字典进行排序--------------
    adict={'b':12,'a':5,'d':1,'c':2,'e':11,'f':7}
    print(sorted(adict.items(), key=lambda x: (x[1],x[0])))
    # [('d', 1), ('c', 2), ('a', 5), ('f', 7), ('e', 11), ('b', 12)]
    print(sorted(adict.items(), key=lambda x: x[0]))
    # [('a', 5), ('b', 12), ('c', 2), ('d', 1), ('e', 11), ('f', 7)]
    print(sorted(adict.items(), key=lambda x: x[1]))
    # [('d', 1), ('c', 2), ('a', 5), ('f', 7), ('e', 11), ('b', 12)]

    # 对类中字段进行
    students = [Student('john', 'A', 15),Student('jane', 'B', 12),Student('dave', 'B', 10)]
    # [('john', 'A', 15), ('jane', 'B', 12), ('dave', 'B', 10)]
    print(sorted(students,key=lambda x:x.age))
    # [('dave', 'B', 10), ('jane', 'B', 12), ('john', 'A', 15)]

测试

本文参与腾讯云自媒体同步曝光计划，分享自微信公众号。

原始发表：2021-07-05，如有侵权请联系 cloudcommunity@tencent.com 删除

python