#!/usr/bin/python """ Module for formating and checking ISBNs. Based con code from Nelson H. F. Beebe included in bibclean which seemss to have some relation to isbn.el. See http://blogs.23.nu/c0re/stories/1416/ for further details. ISBN checking code is based on code snippet from Nikita Borisov at http://www.csclub.uwaterloo.ca/u/nborisov/projects/isbn.html --md@hudora.de Changes: 2003-10-29 prefix table updated return original ISBN instead of None when don't know where to hyphenate 2003-10-27 initial revision """ # NB: We intentionally include the hyphen that separates the # countrygroupnumber from the publishernumber, in order to improve # readability, even though this complicates the processing in # ISBN-in-rangep. Given the position of that hyphen, we automatically # compute the required hyphen positions. # This table could be improved using data from other implementations. # See http://blogs.23.nu/c0re/stories/1416/ # Or, much better updated using the genuide data from # http://www.isbn-international.org/identifiers.html ISBN_range = [ # Australia, Canada (English), Gibraltar, Ireland, New Zealand, # South Africa, Swaziland, United Kingdom, United States of America, # Zimbabwe ( "0-00", "0-19" ), ( "0-200", "0-699" ), ( "0-7000", "0-8499" ), ( "0-85000", "0-89999" ), ( "0-900000", "0-949999" ), ( "0-9500000", "0-9999999" ), # Australia, Canada (English), Gibraltar, Ireland, New Zealand, #South Africa, Swaziland, United Kingdom, United States of America, #Zimbabwe ( "1-00", "1-09"), ( "1-100", "1-399"), ( "1-4000", "1-5499"), ( "1-55000", "1-86979" ), ( "1-869800", "1-99899" ), ( "1-9990000", "1-9999999"), # Belgium, Canada (French), France, Luxembourg, Switzerland (French) ( "2-00", "2-19" ), ( "2-200", "2-349" ), ( "2-35000", "2-39999" ), ( "2-495", "2-699" ), ( "2-530", "2-530" ), ( "2-550", "2-599" ), ( "2-7000", "2-8399" ), ( "2-84000", "2-89999" ), ( "2-900000", "2-949999" ), ( "2-9500000", "2-9999999" ), # Austria, Germany, Switzerland (German) ( "3-00", "3-02" ), ( "3-030", "3-033" ), ( "3-034", "3-0369" ), ( "3-03700", "3-03499" ), ( "3-04", "3-19" ), ( "3-200", "3-699" ), ( "3-7000", "3-8499" ), ( "3-85000", "3-89999" ), ( "3-900000", "3-949999" ), ( "3-9500000", "3-9999999" ), # Japan ( "4-00", "4-19" ), ( "4-250", "4-657" ), ( "4-7500", "4-8470" ), ( "4-87000", "4-89829" ), ( "4-900000", "4-949999" ), # Azerbaijan, Belarus, Commonwealth of Independent States, Estonia, # Georgia, Kazakhstan, Kyrgyzstan, Latvia, Lithuania, Tajikistan, # Turkmenistan, Uzbekistan ( "5-01", "5-12" ), ( "5-200", "5-690" ), ( "5-7000", "5-8396" ), ( "5-85001", "5-89996" ), ( "5-900165", "5-900850" ), # No 6- ISBN assignments # China ( "7-00", "7-04" ), ( "7-100", "7-314" ), ( "7-5000", "7-5639" ), ( "7-80000", "7-81035" ), ( "7-900000", "7-900000" ), # Czechoslovakia (Czech Republic, Slovakia) ( "80-00", "80-09" ), ( "80-200", "80-236" ), ( "80-7000", "80-7151" ), ( "80-85000", "80-85529" ), ( "80-900075", "80-901081" ), # India ( "81-200", "81-648" ), ( "81-7000", "81-7266" ), ( "81-85000", "81-85690" ), ( "81-900000", "81-900211" ), # Norway ( "82-400", "82-599" ), ( "82-7000", "82-7661" ), ( "82-90000", "82-91769" ), ( "82-990000", "82-992517" ), # Poland ( "83-00", "83-11" ), ( "83-200", "83-233" ), ( "83-7000", "83-7080" ), ( "83-85000", "83-85541" ), ( "83-900000", "83-900482" ), # Spain ( "84-00", "84-07" ), ( "84-200", "84-699" ), ( "84-7000", "84-8420" ), ( "84-85000", "84-89600" ), # Brazil ( "85-00", "85-17" ), ( "85-200", "85-341" ), ( "85-7000", "85-7261" ), ( "85-85000", "85-85457" ), ( "85-900000", "85-900051" ), # Bosnia and Herzegovina, Croatia, Macedonia, Slovenia, Yugoslavia ( "86-03", "86-27" ), ( "86-301", "86-455" ), ( "86-7001", "86-7861" ), ( "86-80001", "86-82045" ), ( "86-900941", "86-901289" ), # Denmark ( "87-00", "87-20" ), ( "87-411", "87-611" ), ( "87-7000", "87-7799" ), ( "87-85001", "87-89796" ), ( "87-980000", "87-997202" ), # Italy, Switzerland (Italian) ( "88-00", "88-19" ), ( "88-200", "88-461" ), ( "88-7000", "88-8402" ), ( "88-81785", "88-86065" ), # Republic of Korea ( "89-0", "89-0" ), # <--no assignments yet # Belgium (Flemish), Netherlands ( "90-00", "90-18" ), ( "90-200", "90-449" ), ( "90-5000", "90-6999" ), ( "90-70000", "90-74319" ), ( "90-800000", "90-800868" ), # Sweden ( "91-0", "91-1" ), ( "91-20", "91-48" ), ( "91-500", "91-632" ), ( "91-7000", "91-7998" ), ( "91-85002", "91-88356" ), ( "91-970000", "91-992083" ), # International Publishers (UNESCO) ( "92-0", "92-5" ), ( "92-60", "92-76" ), ( "92-800", "92-893" ), ( "92-9000", "92-9499" ), ( "92-95000", "92-98999"), ( "92-990000", "92-999999"), # EU ( "77", "92-79"), ( "823", "92-824"), ( "829", "92-830"), ( "92-861", "92-861"), ( "92-894", "92-899"), ( "92-9152", "92-9152"), ( "92-9155", "92-9157"), ( "92-9167", "92-9168"), ( "92-9181", "92-9181"), ( "92-9188", "92-9188"), ( "92-9191", "92-9193"), ( "92-9198", "92-9218"), ( "92-95007", "92-95011"), ( "92-95018", "92-95019"), ( "92-95022", "92-95022"), ( "92-95029", "92-95035"), # India ( "93-0", "93-0" ), # <--no assignments yet # Argentina ( "950-00", "950-47" ), ( "950-500", "950-795" ), ( "950-9000", "950-9899" ), ( "950-99000", "950-99949" ), # Finland ( "951-0", "951-1" ), ( "951-20", "951-54" ), ( "951-550", "951-889" ), ( "951-8900", "951-9498" ), ( "951-95000", "951-96448" ), # Finland ( "952-90", "952-90" ), ( "952-6666", "952-6666" ), ( "952-9500", "952-9714" ), # Croatia (publisher range unknown) ( "953-00", "953-99" ), # Bulgaria ( "954-0", "954-0" ), # <--no assignments yet # Sri Lanka ( "955-20", "955-28" ), ( "955-550", "955-616" ), ( "955-9000", "955-9151" ), ( "955-95000", "955-95444" ), # Chile ( "956-10", "956-19" ), ( "956-200", "956-266" ), ( "956-7000", "956-7205" ), # Taiwan (Republic of China) ( "957-05", "957-43" ), ( "957-500", "957-685" ), ( "957-8500", "957-9699" ), # Colombia ( "958-02", "958-32" ), ( "958-600", "958-656" ), ( "958-9000", "958-9285" ), ( "958-95001", "958-95343" ), # Cuba ( "959-00", "959-13" ), ( "959-200", "959-216" ), ( "959-7000", "959-7033" ), # Greece ( "960-00", "960-12" ), ( "960-200", "960-433" ), ( "960-7000", "960-8499" ), ( "960-85000", "960-85203" ), # Slovenia (publisher range unknown) ( "961-00", "961-99" ), # Hong Kong ( "962-00", "962-19" ), ( "962-201", "962-474" ), ( "962-7001", "962-7646" ), # Hungary ( "963-00", "963-18" ), ( "963-200", "963-892" ), ( "963-700", "963-8481" ), ( "963-85000", "963-85084" ), # Iran (publisher range unknown) ( "964-00", "964-99" ), # Israel ( "965-01", "965-19" ), ( "965-207", "965-442" ), # Ukraine (publisher range unknown) ( "966-00", "966-99" ), # Malaysia ( "967-60", "967-89" ), ( "967-900", "967-989" ), ( "967-9900", "967-9989" ), ( "967-99901", "967-99999" ), # Mexico ( "968-10", "968-39" ), ( "968-400", "968-899" ), ( "968-6000", "968-7275" ), # Pakistan ( "969-0", "969-1" ), ( "969-26", "969-39" ), ( "969-400", "969-473" ), ( "969-8000", "969-8159" ), # Mexico ( "970-05", "970-10" ), ( "970-604", "970-619" ), ( "970-91000", "970-91074" ), # Philippines ( "971-06", "971-36" ), ( "971-500", "971-631" ), ( "971-8500", "971-8819" ), ( "971-91000", "971-91273" ), # Portugal ( "972-0", "972-1" ), ( "972-20", "972-50" ), ( "972-550", "972-722" ), ( "972-8004", "972-9499" ), ( "972-95000", "972-97520" ), # Romania ( "973-21", "973-49" ), ( "973-550", "973-682" ), ( "973-9000", "973-9134" ), ( "973-95000", "973-95521" ), # Thailand ( "974-00", "974-10" ), ( "974-200", "974-685" ), ( "974-7000", "974-8499" ), ( "974-85000", "974-88000" ), # Turkey ( "975-09", "975-19" ), ( "975-345", "975-554" ), ( "975-7402", "975-7797" ), ( "975-95384", "975-96606" ), # Caribbean Community (CARICOM): Antigua, Bahamas, Barbados, # Belize, Dominica, Grenada, Guyana, Jamaica, Montserrat, Saint # Kitts and Nevis, Saint Lucia, Saint Vincent, Trinidad and Tobago ( "976-40", "976-42" ), ( "976-600", "976-636" ), ( "976-8000", "976-8105" ), # Egypt ( "977-01", "977-17" ), ( "977-200", "977-457" ), ( "977-5000", "977-5235" ), # Nigeria ( "978-000", "978-199" ), ( "978-2000", "978-2887" ), ( "978-30000", "978-31118" ), # Indonesia ( "979-400", "979-557" ), ( "979-8000", "979-8322" ), # Venezuela ( "980-00", "980-07" ), ( "980-200", "980-316" ), ( "980-6001", "980-6303" ), # Singapore ( "981-00", "981-03" ), ( "981-200", "981-215" ), ( "981-3000", "981-3099" ), # South Pacific, Cook Islands, Fiji, Kiribati, Nauru, Niue, # Solomon Islands, Tokelau, Tonga, Tuvalu, Vanuatu, Samoa ( "982-01", "982-03" ), ( "982-100", "982-500" ), # Malaysia ( "983-60", "983-73" ), ( "983-800", "983-899" ), ( "983-9000", "983-9750" ), ( "983-99382", "983-99747" ), # Bangladesh ( "984-01", "984-30" ), ( "984-400", "984-556" ), ( "984-8005", "984-8085" ), # Belarus (publisher range unknown) ( "985-00", "985-99" ), # Argentina ( "987-0", "987-0" ), # <--no assignments yet # Libya (publisher range unknown) ( "9959-0", "9959-9" ), # Algeria (publisher range unknown) ( "9961-0", "9961-9" ), # Panama (publisher range unknown) ( "9962-0", "9962-9" ), # Cyprus ( "9963-0", "9963-1" ), ( "9963-30", "9963-44" ), ( "9963-550", "9963-599" ), ( "9963-7500", "9963-7846" ), # Ghana ( "9964-0", "9964-6" ), ( "9964-70", "9964-94" ), ( "9964-950", "9964-990" ), # Kazakhstan (publisher range unknown) ( "9965-0", "9965-9" ), # Kenya ( "9966-20", "9966-49" ), ( "9966-830", "9966-882" ), ( "9966-9840", "9966-9874" ), # Kyrgyzstan (publisher range unknown) ( "9967-0", "9967-9" ), # Costa Rica ( "9968-9702", "9968-9703" ), # Uganda (publisher range unknown) ( "9970-0", "9970-9" ), # Singapore ( "9971-0", "9971-4" ), ( "9971-60", "9971-89" ), ( "9971-900", "9971-989" ), ( "9971-9900", "9971-9924" ), # Peru (publisher range unknown) ( "9972-0", "9972-9" ), # Tunisia ( "9973-10", "9973-18" ), ( "9973-700", "9973-925" ), ( "9973-9700", "9973-9918" ), # Uruguay ( "9974-0", "9974-0" ), ( "9974-30", "9974-36" ), ( "9974-550", "9974-580" ), ( "9974-7500", "9974-7505" ), # Moldova (publisher range unknown) ( "9975-0", "9975-9" ), # Tanzania ( "9976-1", "9976-5" ), ( "9976-60", "9976-84" ), ( "9976-900", "9976-988" ), ( "9976-9990", "9976-9992" ), # Costa Rica ( "9977-00", "9977-89" ), ( "9977-900", "9977-989" ), ( "9977-9900", "9977-9999" ), # Ecuador ( "9978-57", "9978-99" ), ( "9978-951", "9978-971" ), ( "9978-9904", "9978-9910" ), # Iceland ( "9979-1", "9979-4" ), ( "9979-50", "9979-57" ), ( "9979-800", "9979-827" ), ( "9979-9000", "9979-9067" ), # Papua New Guinea ( "9980-0", "9980-1" ), ( "9980-54", "9980-84" ), ( "9980-900", "9980-915" ), # Morocco ( "9981-0", "9981-0" ), # <-- no assignments yet # Zambia ( "9982-00", "9982-30" ), ( "9982-800", "9982-815" ), ( "9982-9900", "9982-9900" ), # Gambia ( "9983-85", "9983-87" ), ( "9983-9900", "9983-9901" ), # Latvia (publisher range unknown) ( "9984-0", "9984-9" ), # Estonia (publisher range unknown) ( "9985-0", "9985-9" ), # Lithuania (publisher range unknown) ( "9986-0", "9986-9" ), # Tanzania ( "9987-30", "9987-30" ), ( "9987-550", "9987-583" ), ( "9987-8800", "9987-8823" ), # Ghana ( "9988-0", "9988-0" ), # <--no assignments yet # Macedonia (publisher range unknown) ( "9989-0", "9989-9" ), # Mauritius ( "99903-0", "99903-0" ), # <--no assignments yet # Netherlands Antilles ( "99904-0", "99904-4" ), ( "99904-60", "99904-65" ), ( "99904-900", "99904-917" ), # Malawi ( "99908-11", "99908-29" ), ( "99908-900", "99908-900" ), # Malta (publisher range unknown) ( "99909-0", "99909-9" ), # Lesotho ( "99911-00", "99911-32" ), ( "99911-600", "99911-860" ), # Botswana ( "99912-0", "99912-5" ), ( "99912-60", "99912-79" ), ( "99912-900", "99912-900" ), # Andorra ( "99913-0", "99913-0" ), # <--no assignments yet # Suriname ( "99914-0", "99914-4" ), ( "99914-50", "99914-58" ), ( "99914-900", "99914-927" ), # Maldives ( "99915-0", "99915-3" ), ( "99915-50", "99915-78" ), ( "99915-800", "99915-830" ), # Namibia ( "99916-1", "99916-1" ), ( "99916-30", "99916-35" ), ( "99916-700", "99916-704" ), # Benin (publisher range unknown) ( "99919-0", "99919-9" ), # Andorra ( "99920-0", "99920-2" ), ( "99920-50", "99920-55" ), ( "99920-900", "99920-904" ), # Qatar (publisher range unknown) ( "99921-0", "99921-9" ), # Guatemala (publisher range unknown) ( "99922-0", "99922-9" ), # Nicaragua (publisher range unknown) ( "99924-0", "99924-9" ), # US # ( "0-00", "0-19" ), ( "0-200", "0-699" ), # ( "0-7000", "0-8499" ), ( "0-85000", "0-89999" ), # ( "0-900000", "0-949999" ), ( "0-9500000", "0-9999999" ), ( "1-55000", "1-86979" ), ( "1-869800", "1-998999" ), ( "1-9990000", "1-9999999" ), ] def fix_ISBN(ISBN): """Reformats an ISBN accoding to the ISBN standard""" # cleanup ISBN = ISBN.upper().strip("ISBN ").replace("-", "").replace(" ", "") for begin, end in ISBN_range: if in_ISBN_range(begin, ISBN, end) == 0: return hyphenate_one_ISBN(begin, ISBN) return ISBN def isISBNdigit(c): return c.isdigit() or (c == 'X') or (c == 'x') def skip_non_ISBN_digit(p): while p and not isISBNdigit(p[0]): p = p[1:] return p def hyphenate_one_ISBN(prefix, ISBN): '''Given a countrygroupnumber-publishernumber prefix, and an ISBN optionally containing spaces and hyphens, return a properly-hyphenated ISBN or None if the correct number of ISBN digits is not found. ''' new_ISBN = [] while 1: if prefix.startswith('-'): new_ISBN.append('-') prefix = prefix[1:] elif prefix: if not ISBN: break new_ISBN.append(ISBN[0]) ISBN = ISBN[1:] prefix = prefix[1:] if not prefix: # && (k < MAX_ISBN)) new_ISBN.append('-') else: # past prefix if not ISBN: break new_ISBN.append(ISBN[0]) ISBN = ISBN[1:] # add dash before checksum new_ISBN.append(new_ISBN[-1]) new_ISBN[-2] = '-' return ''.join(new_ISBN) def in_ISBN_range(begin, ISBN, end): '''Compare the countrygroupnumber-publishernumber part of ISBN against the range (begin, end), and return -1 (less than), 0 (in range), or +1 (greater than). ''' begin = begin.replace("-", "") if ISBN[:len(begin)] < begin: return -1 end = end.replace("-", ""); if end.replace("-", "") < ISBN[:len(end)]: return 1 return 0 def squeeze_ISBN(in_ISBN): """Return in_ISBN, eliminating non-ISBN characters.""" out = [] for c in in_ISBN: if isISBNdigit(c): out.append(c) return ''.join(out) def check_ISBN(ISBN): """Returns True if called with an valid ISBN otherwise False.""" ISBN = ISBN.upper().strip("ISBN ").replace("-", "").replace(" ", "") isbnl = list(ISBN) sum = 0 if isbnl[-1] == 'X': isbnl[-1] = 10 for i in range(len(isbnl)): sum += (i + 1) * int(isbnl[i]) return (sum % 11) == 0 if __name__ == "__main__": testdata = [("1-59084-369-X", ("159084369X", "1-5-9-0-8-4-3-6-9-X", " 159084369X ", "1 59084 369 X")), ("3-929019-57-4", ("isbn 3-929019-57-4", "3929019574")), ("1-931836-65-5", ("-1-931836-65-5",)), ("3-499-60857-X", ("3499-60857X",)), ("1-58113-299-8", ("15-811-329-98",)), ("0-7494-2097-9", (" 0 7 4 9 4 2 0 9 7 9",)), ("3-472-03370-3", ("isbn 3472033703",)), ("0-412-78120-4", ("-0412781204-",)), ("3-504-64055-3", ("3504640553",)), ("3-7653-1619-9", ("3 7653 1619 9",)), ("3-88229-188-5", ("3882291885",)), ("3-7719-6378-8", ("3-7719-63788",)), ("3-88322-251-8", ("3883222518",)), ("3-486-22220-1", ("3486222201",)), ("0-690-01743-X", ("0-690-01743-X", "069001743X", "0 690 01743 X")), ("0-201-74613-1", ("ISBN 0-201-74613-1", "0201746131")), ("0-12-163103-6", (" 0-12-163103-6 ", "0-12-163103-6 ISBN")), ("3-89721-204-8", ("3897212048",)), ] import time calls = 0 start = time.time() for right, tests in testdata: for isbn in tests: #print check_ISBN(isbn) if right != fix_ISBN(isbn): print "ERROR: want:", "send:", right, isbn, "got:", fix_ISBN(isbn) calls += 1 for i in range(79): fix_ISBN(isbn) calls += 1 delta = time.time() - start print "processing time: %.3fs for %d calls (%.6f per call)" % (delta, calls, delta / float(calls))