Cif2vasp.py
Jump to navigation
Jump to search
go back to Main Page, Group Pages, Núria López and Group, Scripts_for_VASP
This script converts .cif files into VASP 4.x POSCAR FILES Taken from here [1]
Instructions[edit]
- Install atomic simulation environment (ASE):
sudo apt-get update sudo apt-get install python-ase
- Create a file ~/bin/cif2vasp.py
- Make it executable
chmod +x ~/bin/cif2vasp.py
- To run it:
cif2vasp.py xxx.py
- The output file will be named "POSCAR".
The content of ~/bin/cif2vasp.py should be this:
#! /usr/bin/env python # # Inspired by: http://encina.northwestern.edu/index.php/Cif_to_VASP # # See README.md # import sys, os, re import subprocess from optparse import OptionParser #from cctbx import uctbx, sgtbx, crystal #from cctbx import xray #from cctbx import crystal #from cctbx.array_family import flex def usage(): print "Usage: cif2vasp.py [-v] [-f] [-e gulp|ase|cctbx] filename.cif " print "Options" print " -v, --verbose increase verbosity" print " -f try to identify fractional numbers (special positions) " print " from coordinates with less than six decimals." print " -e, --engine the backend to use for the conversion (gulp, ase or cctbx)" def readCifFile(cifFile): from CifFile import CifFile if not os.path.exists(cifFile): raise IOError("CIF file '%s' was not found!" % (cifFile)) cf = CifFile(cifFile) print "------------------------------------------------------------------" if len(cf) != 1: raise StandardError("The cif file contains %i data blocks, while one was expected") # A cif file can contain several "datablocks" that each start # with "data_". cb = cf[cf.keys()[0]] # open the first block AA = float(re.match('([0-9.]*)',cb['_cell_length_a']).group(0)) BB = float(re.match('([0-9.]*)',cb['_cell_length_b']).group(0)) CC = float(re.match('([0-9.]*)',cb['_cell_length_c']).group(0)) alpha = float(cb['_cell_angle_alpha']) beta = float(cb['_cell_angle_beta']) gamma = float(cb['_cell_angle_gamma']) SG = int(cb['_symmetry_Int_Tables_number']) # spacegroup atomTypes = [] atoms = fracOccFound = False firstAtom = True atoms = [] for atom in cb.GetLoop('_atom_site_label'): atomKeys = dir(atom) if '_atom_site_type_symbol' in atomKeys: m = re.match('[a-z]*',atom._atom_site_type_symbol,re.I) atomType = m.group(0) else: m = re.match('[a-z]*',atom._atom_site_label,re.I) atomType = m.group(0) atomLabel = atom._atom_site_label if '_atom_site_occupancy' in atomKeys: occ = float(atom._atom_site_occupancy) if not occ == 1.0: if not fracOccFound: print " " print " WARNING: Fractional occupancy (" + str(occ) +") " \ + "found for atom of type " + atomType + "." fracOccFound = True else: occ = 1.0 # Some crystal structures obtained by neutron diffraction use D for H: if atomType == 'D': atomType = 'H' atomLabel.replace('H','D') if '_atom_site_symmetry_multiplicity' in atomKeys and '_atom_site_Wyckoff_symbol' in atomKeys: atomTypes.append(atomType+' at '+atom._atom_site_symmetry_multiplicity+atom._atom_site_Wyckoff_symbol) else: atomTypes.append(atomType) atomPos = [atom._atom_site_fract_x, atom._atom_site_fract_y, atom._atom_site_fract_z] for p in atomPos: pp = p.split(".") if len(pp) is 2: decimals = p.split(".")[1] if len(decimals) > 3 and len(decimals) < 6 and decimals[0] == decimals[1] and decimals[-1] != "0": print "\n ---------------------\n"\ + " Warning: If the fractional coordinate "+p+" is a recurring decimal, such as 1/3,\n" \ + " then it is necessary to specify this value to six decimal places to be sure of \n" \ + " it being recognised correctly as a spcecial position.\n ------------------" # The coordinates of the atom (_atom_site_fract_x/y/z) may have # a last digit in parenthesis, like "0.6636(7)". Therefore we # extract the part consisting of only digits and a decimal separator: p = re.compile('[0-9.]*'); atomX = float(p.match(atom._atom_site_fract_x).group()) atomY = float(p.match(atom._atom_site_fract_y).group()) atomZ = float(p.match(atom._atom_site_fract_z).group()) #atoms += "%s %f %f %f %f %f\n" % (atomType, atomX, atomY, atomZ, 0.0, occ) atoms.append({'label': atomLabel, 'type': atomType, 'pos': (atomX,atomY,atomZ) }) firstAtom = False if fracOccFound: print " " print "ERROR: Fractional occupancies are not currently supported.\n" exit() print " Atom types: " + ', '.join(atomTypes) return {'spacegroup': SG, 'unit_cell': [AA,BB,CC,alpha,beta,gamma], 'scatterers': atoms} def cif2vaspUsingCCTBX(jobname, ezvasp = False): #print "WARNING: This script does NOT work with files that have fractional occupancies." # os.mkdir(jobname) cif = readCifFile(jobname+'.cif') print "CIF file read successfully:" unit_cell = uctbx.unit_cell(cif['unit_cell']) space_group_info = sgtbx.space_group_info(symbol=cif['spacegroup']) crystal_symmetry = crystal.symmetry(unit_cell=unit_cell,space_group_info=space_group_info) crystal_symmetry.show_summary() print " " #print " Space group:",SG #print " a=%s, b=%s, c=%s, alpha=%s, beta=%s, gamma=%s" % (AA,BB,CC,alpha,beta,gamma) #print cif['scatterers'] scatterers = flex.xray_scatterer() for s in cif['scatterers']: scatterers.append(xray.scatterer(label=s['label'], site=s['pos'])) print print "--------------- icsd_structure ---------------" print icsd_structure = xray.structure(crystal_symmetry=crystal_symmetry, scatterers=scatterers) icsd_structure.show_summary().show_scatterers() #print #icsd_structure.show_distances(distance_cutoff=2.5) print print "--------------- primitive_structure ---------------" print primitive_structure = icsd_structure.primitive_setting() primitive_structure.show_summary().show_scatterers() print print "--------------- p1_structure ---------------" print p1_structure = primitive_structure.expand_to_p1() p1_structure.show_summary().show_scatterers() print print "OK" # Requires existing gulp out file for unit cell def xyz2vaspUsingGULP(jobname, ezvasp = False, verbose = False, auto_fractions = False): # convasp will convert your atom positions into fractional # and produce a file that looks just like a VASP POSCAR: runConvasp( jobName = jobname, gulpOutputFile = jobname + '.gulp.out', xyzFile = jobname+'.xyz', ezvaspStyle = ezvasp ) if ezvasp: prepareEzvaspInput(jobname) sys.stdout.write("Creating INCAR, POSCAR, POTCAR, KPOINTS using ezvasp... ") sys.stdout.flush() os.system("ezvasp -n vasp.in") sys.stdout.write("done\n") else: os.rename(jobname+'.convasp.out','POSCAR') def cif2vaspUsingGULP(jobname, ezvasp = False, verbose = False, auto_fractions = False): prepareGulpInput( cifFile = jobname + '.cif', gulpFile = jobname + '.gulp.in', jobName = jobname, verbose = verbose, auto_fractions = auto_fractions ) runGulp(jobname, verbose) # convasp will convert your atom positions into fractional # and produce a file that looks just like a VASP POSCAR: runConvasp( jobName = jobname, gulpOutputFile = jobname + '.gulp.out', xyzFile = jobname+'.xyz', ezvaspStyle = ezvasp, verbose = verbose ) if ezvasp: prepareEzvaspInput(jobname) sys.stdout.write("Creating INCAR, POSCAR, POTCAR, KPOINTS using ezvasp... ") sys.stdout.flush() os.system("ezvasp -n vasp.in") sys.stdout.write("done\n") else: os.rename(jobname+'.convasp.out','POSCAR') def cif2vaspUsingASE(jobname): """ ASE seems to do an excellent job with reading cif's. It will write out the coordinates in cartesian coordinates. """ from ase import io atoms = io.read(jobname+'.cif') atoms.write('POSCAR', format = 'vasp') def prepareEzvaspInput(jobname): ezvaspIn = open('vasp.in','w') incar = file('INCAR') while True: line = incar.readline() if len(line) == 0: break ezvaspIn.write(line) # notice comma incar.close() ezvaspIn.write('\n[POSCAR]\n') # notice comma poscar = file(jobname + '.convasp.out') lineNo = 0 while True: lineNo += 1 line = poscar.readline() if len(line) == 0: break if lineNo != 6: # ezvasp is not interested in atom counts line ezvaspIn.write(line) poscar.close() ezvaspIn.close() def findLinesContaining(lines, str): enumLines = enumerate(lines) return [k for k, v in enumLines if str in v] # === CONVASP ====================================================================== def runConvasp(gulpOutputFile, xyzFile, jobName, ezvaspStyle = True, verbose = False): """ Example file: mgh2 1 <-- scaling factor (leave as one) 4.516800 0.000000 0.000000 0.000000 4.516800 0.000000 0.000000 0.000000 3.020500 <-- Cartesian lattice vectors (Angstrom) 2 4 <-- number of atoms of each type (2 Mg atoms, 4 H atoms) Cartesian <-- coordinate style 0.000000000 0.000000000 0.000000000 Mg 2.258400000 2.258400000 1.510250000 Mg 1.382140800 1.382140800 0.000000000 H 3.134659200 3.134659200 0.000000000 H 3.640540800 0.876259200 1.510250000 H 0.876259200 3.640540800 1.510250000 H """ xyzfile = open(xyzFile,'r') #read the GULP output files outfile = open(gulpOutputFile,'r') XYZ = xyzfile.readlines() OUT = outfile.readlines() # Find cartesian lattice vectors in Gulp Output file: lineNo = findLinesContaining(OUT, 'Cartesian lattice vectors (Angstroms)')[0]; latVec = [ OUT[lineNo + 2][:-1].split(), OUT[lineNo + 3][:-1].split(), OUT[lineNo + 4][:-1].split() ] # :-1 removes \n # Find cartesian coordinates in xyz file: atoms = [] atomTypes = [] atomCounts = [] atomCount = 0 for lineNo in range(2, len(XYZ)): line = XYZ[lineNo].split() atomType = line[0] if len(line) == 4: # A valid coordinate line should have length 4 if ezvaspStyle: # Include the element name if we are to use EzVasp ... atoms.append("%s %s %s %s" % (line[1],line[2],line[3],atomType)) else: # ... or drop it if we are to use Vasp directly: atoms.append("%s %s %s" % (line[1],line[2],line[3])) if len(atomTypes) == 0: atomTypes.append(atomType) else: if not atomTypes[len(atomTypes)-1] == atomType: # new atom type atomTypes.append(atomType) atomCounts.append(str(atomCount)) atomCount = 0 atomCount += 1 atomCounts.append(str(atomCount)) # store the count of the last atom type if verbose: for i in range(len(atomTypes)): print " found %s atoms of type %s" % (atomCounts[i],atomTypes[i]) convaspInput = [jobname,'1'] convaspInput.extend([' '.join(v) for v in latVec]) convaspInput.extend([' '.join(atomCounts),'Cartesian']) convaspInput.extend(atoms) stdin = '\n'.join(convaspInput) if verbose: print "Converting to POSCAR format using aconvasp... " p = subprocess.Popen(['aconvasp','--direct'], stdin = subprocess.PIPE, stdout = file(jobname+'.convasp.out',"w"), stderr = subprocess.PIPE ).communicate(stdin) if p[1] != "": print "\n" + p[1] print "See "+jobname+".convasp.out for more details" exit() # === GULP ====================================================================== def prepareGulpInput(cifFile, gulpFile, jobName, verbose = False, auto_fractions = False): """ Example file: mgh2 cell 4.5168 4.5168 3.0205 90.0 90.0 90.0 frac Mg 0.0 0.0 0.0 H 0.306 0.306 0.0 space 136 output xyz mgh2 """ from CifFile import CifFile if not os.path.exists(cifFile): raise IOError("CIF file '%s' was not found!" % (cifFile)) cf = CifFile(cifFile) if verbose: print "------------------------------------------------------------------" if len(cf) != 1: raise StandardError("The cif file contains %i data blocks, while one was expected") # A cif file can contain several "datablocks" that each start # with "data_". if verbose: print "Reading data block '%s'..." % (cf.keys()[0]) cb = cf[cf.keys()[0]] # open the first block AA = float(re.match('([0-9.e]*)',cb['_cell_length_a']).group(0)) BB = float(re.match('([0-9.e]*)',cb['_cell_length_b']).group(0)) CC = float(re.match('([0-9.e]*)',cb['_cell_length_c']).group(0)) alpha = float(cb['_cell_angle_alpha']) beta = float(cb['_cell_angle_beta']) gamma = float(cb['_cell_angle_gamma']) # Spacegroup number (1-230) # '_symmetry_Int_Tables_number' has been superseded by '_space_group_IT_number' if '_space_group_IT_number' in cb.keys(): SG = int(cb['_space_group_IT_number']) elif '_symmetry_Int_Tables_number' in cb.keys(): SG = int(cb['_symmetry_Int_Tables_number']) else: print "WARNING: No space group specified. Assuming P1." SG = 1 # CCTBX: #unit_cell = uctbx.unit_cell([AA,BB,CC,alpha,beta,gamma]) #space_group_info = sgtbx.space_group_info(symbol=cb['_symmetry_space_group_name_H-M']) #crystal_symmetry = crystal.symmetry(unit_cell=unit_cell,space_group_info=space_group_info) #print "CIF file read successfully:" #crystal_symmetry.show_summary() if verbose: print " Space group:",SG print " a=%s, b=%s, c=%s, alpha=%s, beta=%s, gamma=%s" % (AA,BB,CC,alpha,beta,gamma) atomTypes = [] atoms = fracOccFound = False firstAtom = True atoms = "" # The coordinates of the atom (_atom_site_fract_x/y/z) may have # a last digit in parenthesis, like "-0.6636(7)". Therefore we # extract the part consisting of only digits and a decimal separator: coordsMatch = re.compile('[0-9.e-]*'); for atom in cb.GetLoop('_atom_site_label'): atomKeys = dir(atom) if '_atom_site_type_symbol' in atomKeys: m = re.match('[a-z]*',atom._atom_site_type_symbol,re.I) atomType = m.group(0) else: m = re.match('[a-z]*',atom._atom_site_label,re.I) atomType = m.group(0) if '_atom_site_occupancy' in atomKeys: occ = float(coordsMatch.match(atom._atom_site_occupancy).group()) if not occ == 1.0: if not fracOccFound: print " " print " WARNING: Fractional occupancy (" + str(occ) +") " \ + "found for atom of type " + atomType + "." fracOccFound = True else: occ = 1.0 # Some crystal structures obtained by neutron diffraction use D for H: if atomType == 'D': atomType = 'H' if '_atom_site_symmetry_multiplicity' in atomKeys and '_atom_site_Wyckoff_symbol' in atomKeys: atomTypes.append(atomType+' at '+atom._atom_site_symmetry_multiplicity+atom._atom_site_Wyckoff_symbol) else: atomTypes.append(atomType) atomX = coordsMatch.match(atom._atom_site_fract_x).group() atomY = coordsMatch.match(atom._atom_site_fract_y).group() atomZ = coordsMatch.match(atom._atom_site_fract_z).group() atomPos = [atomX, atomY, atomZ] for i in range(3): pp = atomPos[i].split(".") if len(pp) is 2: decimals = pp[1] if len(decimals) > 3 and len(decimals) < 6 and decimals[0] == decimals[1] and decimals[0] == decimals[2] and decimals[-1] != "0": if auto_fractions: oldPos = atomPos[i] atomPos[i] = "%.6f" % (float(eval('1.*'+float2fraction(atomPos[i])))) print " Notice: Converted %s into %s" %(oldPos,atomPos[i]) else: print "\n"\ + " ! Warning: The coordinate "+atomPos[i]+" looks similar to the fraction %s, but\n" % float2fraction(atomPos[i]) \ + " ! has insufficient decimals to be recognized as so by GULP. If you want\n" \ + " ! this coordinate to be recognized as a special high-symmetry position,\n" \ + " ! you need to specify at least six digits. If you run cif2vasp with the \n" \ + " ! -f switch, cif2vasp will try to add the necessary decimals automaticly." atoms += "%s %s %s %s %f %f\n" % (atomType, atomPos[0], atomPos[1], atomPos[2], 0.0, occ) firstAtom = False if fracOccFound: print " " print "ERROR: Fractional occupancies are not currently supported.\n" exit() if verbose: print " Atom types: " + ', '.join(atomTypes) gulpFile = open(gulpFile,'w') #Create and write the GULP gulpFile.writelines([jobName+'\n', 'cell\n', '%s %s %s %s %s %s\n' % (AA,BB,CC,alpha,beta,gamma), 'frac\n', atoms, 'space\n', str(SG)+'\n', 'output xyz '+jobName+'\n' ]) # A probably not very robust function to convert a float like "0.333" to a fraction "1/3" def float2fraction(f): f = float(f) num=1. # Start with 1 as numerator den=num/f # Find denominator r=den%1 if abs(r) > 1e-2: # If denominator is decimal fac=1./r num *= fac # Scale numerator.. den *= fac # .. and denominator return "%d/%d" % (round(num),round(den)) def runGulp(jobname, verbose = False): if verbose: print "Prepare primitive cell in xyz format using GULP... " p = subprocess.Popen("gulp", stdin = file(jobname+'.gulp.in'), stdout = file(jobname+'.gulp.out',"w"), stderr = subprocess.PIPE ).communicate() # communicate() returns a tuple (stdoutdata, stderrdata) if p[1] != "": print "\n" + p[1] print "See "+jobname+".gulp.out for more details" exit() if verbose: os.system("grep 'Crystal family' '%s'" % (jobname+'.gulp.out')) os.system("grep 'Space group' '%s'" % (jobname+'.gulp.out')) if __name__ == "__main__": # Parse cmd line args parser = OptionParser( usage = "usage: %prog [options] filename.cif" ) pdf_file = os.path.splitext(os.path.basename(sys.argv[0]))[0] + '.pdf' parser.add_option('-v', '--verbose', action='store_true', dest = 'verbose', default = False, help = 'increase verbosity') parser.add_option('-f', action='store_true', dest = 'auto_fractions', default = False, help = 'try to identify fractional numbers (special positions) from coordinates with less than six decimals.') parser.add_option('-e', '--engine', dest = 'engine', default = 'ase', help = 'the backend to use for the conversion (gulp, ase or cctbx)') (options, args) = parser.parse_args() if len(args) != 1: print "No filename given. Run %s -h for help" % (parser.get_prog_name()) sys.exit(1) filename = args[0] if filename[-4:] == '.xyz': jobname = filename[0:-4] #cif2vaspUsingCCTBX(jobname) xyz2vaspUsingGULP(jobname, verbose = options.verbose, auto_fractions = options.auto_fractions) elif filename[-4:] == '.cif': jobname = filename[0:-4] if options.engine == 'cctbx': cif2vaspUsingCCTBX(jobname) elif options.engine == 'gulp': cif2vaspUsingGULP(jobname, verbose = options.verbose, auto_fractions = options.auto_fractions) elif options.engine == 'ase': cif2vaspUsingASE(jobname) else: print "Error: unknown engine specified." else: print "The input file must have the file-ending '.cif'" usage() sys.exit(2)