Corrigés des exercices du chapitre codage des caractères¶

Exercice 1¶

def miroir(chaine):
    """
    Paramètre : chaine de type str
    Valeur renvoyée : chaine de caractères miroir de chaine
    """
    res = ''
    for c in chaine:
        res = c + res
    return res

miroir('Suis-je toujours la plus belle?')

'?elleb sulp al sruojuot ej-siuS'

def palindrome(chaine):
    """
    Paramètre : chaine de type str
    Valeur renvoyée : un booléen indiquant si chaine est un palindrome
    Précondition : suppression des espaces d'abord
    Complexité : parcourt toute la chaine
    """
    chaine = chaine.replace(' ', '')
    return miroir(chaine) == chaine

palindrome('caser vite ce palindrome ne mord ni lape cet ivre sac')

True

def palindrome_moitie(chaine):
    """
    Paramètre : chaine de type str
    Valeur renvoyée : un booléen indiquant si chaine est un palindrome
    Précondition : suppression des espaces d'abord
    Complexité : parcourt la moitié de la chaine
    """
    chaine = chaine.replace(' ', '')
    n = len(chaine) - 1
    for k in range(len(chaine) // 2):
        if chaine[k] != chaine[n - k]:
            return False
    return True

palindrome_moitie('anna')

True

palindrome_moitie('anana')

True

Exercice 2¶

Table ASCII¶

Les 32 premiers caractères ne sont pas imprimables.

for a in range(8):
    for b in range(16):
        print(chr(a * 16 + b), end = " ")
    print()

         	 
   
                
  ! " # $ % & ' ( ) * + , - . / 
0 1 2 3 4 5 6 7 8 9 : ; < = > ? 
@ A B C D E F G H I J K L M N O 
P Q R S T U V W X Y Z [ \ ] ^ _ 
` a b c d e f g h i j k l m n o 
p q r s t u v w x y z { | } ~ 

caractere_plus_petit(c1, c2)

def caractere_plus_petit(c1, c2):
    return ord(c1) <= ord(c2)

chaine_plus_petite(chaine1, chaine2)

def chaine_plus_petite(chaine1, chaine2):
    for k in range(min(len(chaine1), len(chaine2))):
        if chaine1[k] > chaine2[k]:
            return False
        elif chaine1[k] < chaine2[k]:
            return True
    return len(chaine1) <= len(chaine2)

assert chaine_plus_petite('a', 'b') == True
assert chaine_plus_petite('a', 'a') == True
assert chaine_plus_petite('a', 'aa') == True
assert chaine_plus_petite('a', 'ab') == True
assert chaine_plus_petite('aa', 'ab') == True
assert chaine_plus_petite('ab', 'aa') == False

def croissant(tab, comparaison):
    for k in range(len(tab) - 1):
        if not comparaison(tab[k],tab[k+1]):
            return False
    return True

croissant(['Chat', 'Chien', 'Cheval', 'Cochon'], 
          chaine_plus_petite)

False

croissant(['Chien', 'Cheval', 'Cochon', 'Chat'], 
          chaine_plus_petite)

False

croissant(['Cochon', 'Chien', 'Cheval', 'Chat'], 
          chaine_plus_petite)

False

Exercice 3 Codage ROT13¶

def rot13(chaine):
    res = ''
    chaine = chaine.upper()
    origine = ord('A')
    for c in chaine:
        res = res + chr(origine + (ord(c) - origine + 13) % 26)
    return res

rot13('ave cesar')

'NIRGPRFNE'

Module `this` le Zen de Python¶

Commençons par importer le module this de Python pour obtenir un court texte résumant la philosophie du langage

import this

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!

%psource this

Ci-dessous le code source du module, expliquez le code :

s = """Gur Mra bs Clguba, ol Gvz Crgref

Ornhgvshy vf orggre guna htyl.
Rkcyvpvg vf orggre guna vzcyvpvg.
Fvzcyr vf orggre guna pbzcyrk.
Pbzcyrk vf orggre guna pbzcyvpngrq.
Syng vf orggre guna arfgrq.
Fcnefr vf orggre guna qrafr.
Ernqnovyvgl pbhagf.
Fcrpvny pnfrf nera'g fcrpvny rabhtu gb oernx gur ehyrf.
Nygubhtu cenpgvpnyvgl orngf chevgl.
Reebef fubhyq arire cnff fvyragyl.
Hayrff rkcyvpvgyl fvyraprq.
Va gur snpr bs nzovthvgl, ershfr gur grzcgngvba gb thrff.
Gurer fubhyq or bar-- naq cersrenoyl bayl bar --boivbhf jnl gb qb vg.
Nygubhtu gung jnl znl abg or boivbhf ng svefg hayrff lbh'er Qhgpu.
Abj vf orggre guna arire.
Nygubhtu arire vf bsgra orggre guna *evtug* abj.
Vs gur vzcyrzragngvba vf uneq gb rkcynva, vg'f n onq vqrn.
Vs gur vzcyrzragngvba vf rnfl gb rkcynva, vg znl or n tbbq vqrn.
Anzrfcnprf ner bar ubaxvat terng vqrn -- yrg'f qb zber bs gubfr!"""

d = {}
for c in (65, 97):
    for i in range(26):
        d[chr(i+c)] = chr((i+13) % 26 + c)

print("".join([d.get(c, c) for c in s]))

Exercice 4¶

def valeur(mot):
    v = 1
    for c in mot:
        v = v * (ord(c) - ord('a') + 1)
    return v

def indice_maximum(tab):
    """Renvoie l'index du maximum du tableau de nombres tab"""
    imax = 0
    for k in range(1, len(tab)):
        if tab[k] > tab[imax]:
            imax = k
    return imax    
    
f = open('dico.txt')
histo = [0 for _ in range(1000)]
for mot in f:
    v = valeur(mot.rstrip())    
    if 2000 <= v < 3000:
        #print(mot, v)
        histo[v - 2000] = histo[v - 2000] + 1
f.close()
annee_max = 2000 + indice_maximum(histo)
print(annee_max)

2160

Exercice 7¶

valeur en décimal (base 10) du point de code U+ABCD .

int(0xABCD)

43981

caractère dont le point de code est U+263A

'\U0000263A'

'☺'

pointcode = 0x263A
print(pointcode)
for k in range(10):
    print(chr(pointcode))
    pointcode = pointcode + 1

9786
☺
☻
☼
☽
☾
☿
♀
♁
♂
♃

Le code affiche précédent les caractères correspondants aux points de codes compris (en hexadécimal) entre 263A (9786 en décimal) et 263A + 10 (9796 en décimal)

0x263A

9786

0x263A + 10

9796

print("\U0001f600")

😀

hex(0x1F600 + 79)

'0x1f64f'

for pointcode in range(0x1F600, 0x1F650):
    print(chr(pointcode), end = ' ')
    if pointcode % 16 == 15:
        print()

😀 😁 😂 😃 😄 😅 😆 😇 😈 😉 😊 😋 😌 😍 😎 😏 
😐 😑 😒 😓 😔 😕 😖 😗 😘 😙 😚 😛 😜 😝 😞 😟 
😠 😡 😢 😣 😤 😥 😦 😧 😨 😩 😪 😫 😬 😭 😮 😯 
😰 😱 😲 😳 😴 😵 😶 😷 😸 😹 😺 😻 😼 😽 😾 😿 
🙀 🙁 🙂 🙃 🙄 🙅 🙆 🙇 🙈 🙉 🙊 🙋 🙌 🙍 🙎 🙏

Exercice 8¶

Fonction unicode¶

def unicode(s):
    """Affiche les caractères d'une chaine, leur point de code et leurs octets codants en hexadécimal
    et en binaire"""
    for caractere in s:
        octets = caractere.encode("utf-8")
        octets_hexa = [hex(oct) for oct in octets]
        octets_bin = [bin(oct) for oct in octets]
        print("Caractère : {} | Point de code  : {} | Octets (hexa) : {} | Octets (binaire) : {}".format(caractere,ord(caractere),octets_hexa, octets_bin))

def unicode2(s):
    print("Caractères : ")
    for c in s:
        print(c, end=",")
    print("\n\nPoints de code : ")
    for c in s:
        print(ord(c),end=",")
    octets = s.encode("utf-8")
    print("\n\nOctets codants en hexadécimal : ")
    for c in octets:
        print(hex(c), end=',')
    print("\n\nOctets codants en binaire : ")
    for c in octets:
        print(bin(c), end=',')
        
def unicode3(s):
    """Affiche les caractères d'une chaine, leur point de code et leurs octets codants en hexadécimal
    et en binaire"""
    for caractere in s:
        octets = caractere.encode("utf-8")
        octets_hexa = [format(b,'x') for b in octets]
        octets_bin = [format(b,'08b') for b in octets]
        print("Caractère : {} | Point de code  : {} | Octets (hexa) : {} | Octets (binaire) : {}".format(caractere,ord(caractere),octets_hexa, octets_bin))

unicode("lycée")

Caractère : l | Point de code  : 108 | Octets (hexa) : ['0x6c'] | Octets (binaire) : ['0b1101100']
Caractère : y | Point de code  : 121 | Octets (hexa) : ['0x79'] | Octets (binaire) : ['0b1111001']
Caractère : c | Point de code  : 99 | Octets (hexa) : ['0x63'] | Octets (binaire) : ['0b1100011']
Caractère : é | Point de code  : 233 | Octets (hexa) : ['0xc3', '0xa9'] | Octets (binaire) : ['0b11000011', '0b10101001']
Caractère : e | Point de code  : 101 | Octets (hexa) : ['0x65'] | Octets (binaire) : ['0b1100101']

unicode2("lycée")

Caractères : 
l,y,c,é,e,

Points de code : 
108,121,99,233,101,

Octets codants en hexadécimal : 
0x6c,0x79,0x63,0xc3,0xa9,0x65,

Octets codants en binaire : 
0b1101100,0b1111001,0b1100011,0b11000011,0b10101001,0b1100101,

unicode3("lycée")

Caractère : l | Point de code  : 108 | Octets (hexa) : ['6c'] | Octets (binaire) : ['01101100']
Caractère : y | Point de code  : 121 | Octets (hexa) : ['79'] | Octets (binaire) : ['01111001']
Caractère : c | Point de code  : 99 | Octets (hexa) : ['63'] | Octets (binaire) : ['01100011']
Caractère : é | Point de code  : 233 | Octets (hexa) : ['c3', 'a9'] | Octets (binaire) : ['11000011', '10101001']
Caractère : e | Point de code  : 101 | Octets (hexa) : ['65'] | Octets (binaire) : ['01100101']

Encodage UTF-8 d'un caractère en hexadécimal, décimal et binaire¶

etoile = chr(8902)

etoile_octet = etoile.encode('utf8')

etoile_octet

b'\xe2\x8b\x86'

etoile_liste_octet = [bin(octet) for octet in etoile_octet]
print(etoile_liste_octet)

['0b11100010', '0b10001011', '0b10000110']

etoile_liste_decimal = [octet for octet in etoile_octet]
print(etoile_liste_decimal)

[226, 139, 134]

Nombre de caractères codés par une chaine d'octets en UTF-8¶

def longueur(b):
    """Retourne le nombre de caractères encodé par une 
    chaine d'octets en utf8"""
    k = 0
    long = 0
    while k < len(b):
        #attention les représentations binaires des octets ne sont pas remplies par des 0 à gauche
        binaire = bin(b[k]).lstrip('0b').zfill(8)
        #decimal = b[k]
        long += 1
        if binaire[0] == '0':
            k += 1
        elif binaire[:3] == '110':
            k += 2
        elif binaire[:4] == '1110':
            k += 3
        else:
            k += 4
    return long

longueur(etoile.encode('utf8'))

1

chaine = 'élémentaire mon cher Watson'.encode('utf8')

longueur('élémentaire mon cher Watson'.encode('utf8'))

27

len('élémentaire mon cher Watson')

27

chaine = 'élémentaire mon cher Watson'.encode('utf8')

type(chaine)

bytes

len(chaine)

29

chaine

b'\xc3\xa9l\xc3\xa9mentaire mon cher Watson'

'1110'.zfill(8)

'00001110'