typo_comparison

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env python
class ToBigTolerance(Exception):
    def __init__(self, keyword):
        self.keyword = keyword
    def __str__(self):
        return 'ToBigTolerance: tolerance must be smaller than half keyboard length: %s' % (len(self.keyword)/2)


def typo_comparison(keyword, string, tolerance):
    """
    A simple function to make assumptions about string matchings. No regexp - just regular comparisons of strings.
    Based on assumption that data is rarely typed correctly.
    """
    
    keyword = keyword.lower()
    string = string.lower()
    
    result = False
    
    error_count = 0
    
    if tolerance == 0:
        _basic_compare(keyword, string)     
   
    elif tolerance == 1:
        _basic_compare(keyword, string)    
 
        if len(string) == len(keyword):
            for letter in string:
                if letter in keyword:
                    result = True
                else:
                    result = False
                    break
        else:
            result = False
    
        return result
    
    else:
             
        tolerance -= 1
        counter = 0
        if tolerance > (len(keyword) / 2):
            raise ToBigTolerance(keyword)
        
        if len(string) >= (len(keyword) - tolerance) and len(string) <= (len(keyword) + tolerance):
           
            for letter in string:
                print "letter is: %s" % letter
                
                low_border = counter - tolerance
                high_border = counter + tolerance + 1
                
                
                if high_border >= len(keyword):
                    high_border = len(keyword)
                    low_border = high_border - (tolerance *2) -1
                    
                elif low_border <= 0:
                    low_border = 0
                    high_border = (tolerance * 2) +1
                
                
                print 'counter at: %s' % counter
                print 'comparing against: %s' % keyword[low_border:high_border]
                print 'range: %s - %s' % (low_border, high_border)
                counter += 1
                
                if letter in keyword[low_border:high_border]:
                    
                   
                    continue
                else:
                    error_count+=1
                
                
            if error_count > tolerance:
                result = False
            else:
                result = True
            
           
            
        else:
            result = False
        
        return result
    
def _basic_compare(keyword, string):
    if keyword == string:
        result = True
           
    else:
        sresult = False
    
    return result

More like this

  1. newforms: Add field-specific error in form.clean() by miracle2k 6 years, 9 months ago
  2. Validate by file content type and size by macmichael01 5 years, 2 months ago
  3. RequiredNullBooleanField by wwu.housing 5 years ago
  4. List all errors in a form +bootstrap highlighting by ibrahimlawal 1 year, 6 months ago
  5. filter dates to user profile's timezone by Scanner 7 years ago

Comments

buriy (on August 23, 2009):

Python snippets are posted on ASPN python cookbook. Here's the place for django snippets. Thanks for attention.

#

rogus (on August 23, 2009):

I use it with django. Draw a border between django and python?

#

(Forgotten your password?)